This commit is contained in:
wea_ondara
2020-02-09 11:04:33 +01:00
parent 75dd1064fa
commit 93555d9cbf
4 changed files with 20 additions and 9 deletions

View File

@@ -90,6 +90,8 @@ def main(folder, intervl):
for (i, post) in enumerate(filteredposts): for (i, post) in enumerate(filteredposts):
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts))) printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
for a in post['Answers']: for a in post['Answers']:
if a['CreationDate'] > post['CreationDate'] + timedelta(days=DAYS_NEW_USER):
continue
# if a['Id'] in cachedsentiments.keys(): # if a['Id'] in cachedsentiments.keys():
toxlevel = cachedsentiments[a['Id']] toxlevel = cachedsentiments[a['Id']]
# else: # else:
@@ -178,6 +180,8 @@ def main(folder, intervl):
for (i, post) in enumerate(filteredposts): for (i, post) in enumerate(filteredposts):
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts))) printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
for a in post['Answers']: for a in post['Answers']:
if a['CreationDate'] > post['CreationDate'] + timedelta(days=DAYS_NEW_USER):
continue
# if a['Id'] in cachedsentiments.keys(): # if a['Id'] in cachedsentiments.keys():
toxlevel = cachedsentiments[a['Id']] toxlevel = cachedsentiments[a['Id']]
# else: # else:

7
its.py
View File

@@ -42,8 +42,11 @@ def main(folder, intervl):
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
# avg sentiments # avg sentiments
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound'] filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to for a in p['Answers']
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) if option_date_from <= p['CreationDate'] < option_date_to #post in interval
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate'] # post created withon 1 week of 1st contrib
and p['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer within 1 week of post creation
.filter(lambda p: p != []) .filter(lambda p: p != [])
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []) .reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
.getresults()) .getresults())

View File

@@ -43,10 +43,11 @@ def main(folder, intervl):
postcounts = {id: len(pc) for (id, pc) in postcounts.items()} postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys()))) activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys())))
activitynewusersinmonth = defaultdict(int) # TODO match month exactly activitynewusersinmonth = defaultdict(int)
for p in newposts: for p in newposts:
if firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate']: if firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate']:
activitynewusersinmonth[p['OwnerUserId']] += 1 activitynewusersinmonth[p['OwnerUserId']] += 1
for p in posts:
for a in p['Answers']: for a in p['Answers']:
if firstcontrib[a['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']: if firstcontrib[a['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']:
activitynewusersinmonth[p['OwnerUserId']] += 1 activitynewusersinmonth[p['OwnerUserId']] += 1
@@ -63,14 +64,16 @@ def main(folder, intervl):
plt.xlabel("#posts") plt.xlabel("#posts")
plt.ylabel("#users with X posts") plt.ylabel("#users with X posts")
fig.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) fig.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
plt.title("Histogram for user post count registered between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y")) plt.title("Histogram for user post count between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
fig.savefig(histfilename + ".png", bbox_inches='tight') fig.savefig(histfilename + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
imgmagickcmd += " " + histfilename + ".png" imgmagickcmd += " " + histfilename + ".png"
# answers to new users # answers to new users
answers = (dmt(posts).map(lambda q: [a for a in q['Answers'] if option_date_from <= a['CreationDate'] < option_date_to answers = (dmt(posts).map(lambda q: [a for a in q['Answers']
and firstcontrib[q['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']]) if option_date_from <= a['CreationDate'] < option_date_to # answer in interval
and firstcontrib[q['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > q['CreationDate'] # post created within 1 week of 1st contrib
and q['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer created within 1 week of post
.getresults()) .getresults())
count = sum([len(a) for a in answers]) count = sum([len(a) for a in answers])
answerstonewusers.append(((option_date_from, option_date_to), count)) answerstonewusers.append(((option_date_from, option_date_to), count))

View File

@@ -36,12 +36,13 @@ def main(folder, intervl):
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
# avg sentiments # avg sentiments
scores = (dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to scores = (dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) >= p['CreationDate']) and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate'])
.map(lambda p: p['Score']) .map(lambda p: p['Score'])
.getresults()) .getresults())
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound'] filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to # post in interval
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) >= a['CreationDate']]) and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate'] # post within 1 week of 1st contrib
and p['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer within 1 week of post
.filter(lambda p: p != []) .filter(lambda p: p != [])
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []) .reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
.getresults()) .getresults())