wip
This commit is contained in:
67
posthist.py
67
posthist.py
@@ -6,7 +6,7 @@ from datetime import timedelta
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
|
||||
from common import calc_intervals, IMAGE_MAGICK, DAYS_NEW_USER
|
||||
from common import calc_intervals, IMAGE_MAGICK, DAYS_NEW_USER, FIG_SIZE
|
||||
from loader import load, dmt
|
||||
from sentiments import readtoxleveltxt
|
||||
|
||||
@@ -22,9 +22,12 @@ def main(folder, intervl):
|
||||
os.system("mkdir -p " + outputdir)
|
||||
|
||||
activeusercounts = []
|
||||
newusercounts = []
|
||||
answerstonewusers = []
|
||||
sentimentstonewusers = []
|
||||
activitynewusers = []
|
||||
questionsininterval = []
|
||||
answersininterval = []
|
||||
imgmagickcmd = IMAGE_MAGICK
|
||||
for (option_date_from, option_date_to) in intervals:
|
||||
print(option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y"))
|
||||
@@ -32,6 +35,10 @@ def main(folder, intervl):
|
||||
# post histograms
|
||||
# filter posts by option_date_from <= creation date <= option_date_to
|
||||
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
|
||||
questionsininterval.append(((option_date_from, option_date_to), len(newposts)))
|
||||
newanswers = dmt(posts).map(lambda p: [a for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to], "filtering answers by date") \
|
||||
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []).getresults()
|
||||
answersininterval.append(((option_date_from, option_date_to), len(newanswers)))
|
||||
|
||||
postcounts = defaultdict(list)
|
||||
i = 0
|
||||
@@ -42,6 +49,7 @@ def main(folder, intervl):
|
||||
# postcounts[p['OwnerUserId']].append(a)
|
||||
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
||||
activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys())))
|
||||
newusercounts.append(((option_date_from, option_date_to), len([u for u in users if option_date_from <= u['CreationDate'] < option_date_to])))
|
||||
|
||||
activitynewusersinmonth = defaultdict(int)
|
||||
for p in newposts:
|
||||
@@ -57,7 +65,7 @@ def main(folder, intervl):
|
||||
histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") + "-i" + str(intervl)
|
||||
|
||||
histdata = [pc for pc in postcounts.values()]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
fig = plt.figure(figsize=FIG_SIZE)
|
||||
plt.hist(histdata, range(max(histdata, default=0) + 1))
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0)
|
||||
@@ -71,9 +79,9 @@ def main(folder, intervl):
|
||||
|
||||
# answers to new users
|
||||
answers = (dmt(posts).map(lambda q: [a for a in q['Answers']
|
||||
if option_date_from <= a['CreationDate'] < option_date_to # answer in interval
|
||||
and firstcontrib[q['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > q['CreationDate'] # post created within 1 week of 1st contrib
|
||||
and q['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer created within 1 week of post
|
||||
if option_date_from <= a['CreationDate'] < option_date_to # answer in interval
|
||||
and firstcontrib[q['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > q['CreationDate'] # post created within 1 week of 1st contrib
|
||||
and q['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer created within 1 week of post
|
||||
.getresults())
|
||||
count = sum([len(a) for a in answers])
|
||||
answerstonewusers.append(((option_date_from, option_date_to), count))
|
||||
@@ -87,32 +95,34 @@ def main(folder, intervl):
|
||||
os.system(imgmagickcmd + " " + outputdir + "/posthist-i" + str(intervl) + ".pdf")
|
||||
|
||||
# plot posts diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in activeusercounts], [y for (x, y) in activeusercounts])
|
||||
fig = plt.figure(figsize=FIG_SIZE)
|
||||
plt.plot([u[0] for (u, y) in activeusercounts], [y for (u, y) in activeusercounts], label="active users")
|
||||
plt.plot([u[0] for (u, y) in newusercounts], [y for (u, y) in newusercounts], label='newly registered users')
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('#active users')
|
||||
plt.ylabel('#users')
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=1)
|
||||
# plt.ylim(bottom=1)
|
||||
plt.title("Active users")
|
||||
plt.legend(loc="upper right")
|
||||
fig.savefig(outputdir + "activeusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
# plot answers to new users diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in answerstonewusers], [y for (x, y) in answerstonewusers])
|
||||
fig = plt.figure(figsize=FIG_SIZE)
|
||||
plt.plot([u[0] for (u, y) in answerstonewusers], [y for (u, y) in answerstonewusers])
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('#answers per question of a new user')
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=1)
|
||||
# plt.ylim(bottom=1)
|
||||
plt.title("Answers to new users")
|
||||
fig.savefig(outputdir + "answerstonewusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
# plot sentiments of answers to new users diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [b for (x, [y, b, n, g]) in sentimentstonewusers], label="Neg. answer")
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [n for (x, [y, b, n, g]) in sentimentstonewusers], label="Neu. answer")
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [g for (x, [y, b, n, g]) in sentimentstonewusers], label="Pos. answer")
|
||||
fig = plt.figure(figsize=FIG_SIZE)
|
||||
plt.plot([u[0] for (u, y) in sentimentstonewusers], [b for (u, [y, b, n, g]) in sentimentstonewusers], label="Neg. answer")
|
||||
plt.plot([u[0] for (u, y) in sentimentstonewusers], [n for (u, [y, b, n, g]) in sentimentstonewusers], label="Neu. answer")
|
||||
plt.plot([u[0] for (u, y) in sentimentstonewusers], [g for (u, [y, b, n, g]) in sentimentstonewusers], label="Pos. answer")
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('sentiment')
|
||||
plt.yscale('log')
|
||||
@@ -123,8 +133,8 @@ def main(folder, intervl):
|
||||
plt.close(fig)
|
||||
|
||||
# plot activity for new users
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in activitynewusers], [y for x, y in activitynewusers], label="activity")
|
||||
fig = plt.figure(figsize=FIG_SIZE)
|
||||
plt.plot([u[0] for (u, y) in activitynewusers], [y for u, y in activitynewusers], label="activity")
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('#questions or answers created by a new user')
|
||||
plt.legend(loc="upper right")
|
||||
@@ -132,6 +142,27 @@ def main(folder, intervl):
|
||||
fig.savefig(outputdir + "activitynewusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
# plot activity for new users
|
||||
fig = plt.figure(figsize=FIG_SIZE)
|
||||
plt.plot([u[0] for (u, y) in questionsininterval], [y for u, y in questionsininterval], label="questions")
|
||||
plt.plot([u[0] for (u, y) in answersininterval], [y for u, y in answersininterval], label="answer")
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('quantity')
|
||||
plt.legend(loc="upper right")
|
||||
plt.title("Average activity per new user")
|
||||
fig.savefig(outputdir + "postsanswers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
#print data set stats
|
||||
print("users: " + str(len(users)))
|
||||
print("questions: " + str(len(posts)))
|
||||
print("answers: " + str(sum(dmt(posts).map(lambda q: len(q['Answers'])).getresults())))
|
||||
print("active user last month: " + str(activeusercounts[-1]))
|
||||
useridmapping = {u['Id']: u for u in users}
|
||||
newuserposts = dmt(posts).filter(lambda q: q['CreationDate'] < useridmapping[q['OwnerUserId']]['CreationDate'] + timedelta(days=DAYS_NEW_USER)).getresults()
|
||||
newuserlist = set([q['OwnerUserId'] for q in newuserposts])
|
||||
print("questions from new users: " + str(len(newuserposts)))
|
||||
print("questions from new users/new user: " + str(len(newuserposts) / len(newuserlist)))
|
||||
|
||||
if __name__ == "__main__":
|
||||
# execute only if run as a script
|
||||
|
||||
Reference in New Issue
Block a user