From 93555d9cbfa6f7416a0bdce7c0945a58a58d5fa1 Mon Sep 17 00:00:00 2001 From: wea_ondara Date: Sun, 9 Feb 2020 11:04:33 +0100 Subject: [PATCH] wip --- analyze_batch.py | 4 ++++ its.py | 7 +++++-- posthist.py | 11 +++++++---- votes.py | 7 ++++--- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/analyze_batch.py b/analyze_batch.py index 6dde225..1566322 100644 --- a/analyze_batch.py +++ b/analyze_batch.py @@ -90,6 +90,8 @@ def main(folder, intervl): for (i, post) in enumerate(filteredposts): printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts))) for a in post['Answers']: + if a['CreationDate'] > post['CreationDate'] + timedelta(days=DAYS_NEW_USER): + continue # if a['Id'] in cachedsentiments.keys(): toxlevel = cachedsentiments[a['Id']] # else: @@ -178,6 +180,8 @@ def main(folder, intervl): for (i, post) in enumerate(filteredposts): printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts))) for a in post['Answers']: + if a['CreationDate'] > post['CreationDate'] + timedelta(days=DAYS_NEW_USER): + continue # if a['Id'] in cachedsentiments.keys(): toxlevel = cachedsentiments[a['Id']] # else: diff --git a/its.py b/its.py index 88047ee..378fe92 100644 --- a/its.py +++ b/its.py @@ -42,8 +42,11 @@ def main(folder, intervl): print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) # avg sentiments filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound'] - for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to - and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) + for a in p['Answers'] + if option_date_from <= p['CreationDate'] < option_date_to #post in interval + and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate'] # post created withon 1 week of 1st contrib + and p['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer within 1 week of post creation + .filter(lambda p: p != []) .reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []) .getresults()) diff --git a/posthist.py b/posthist.py index ee36c24..65c027a 100644 --- a/posthist.py +++ b/posthist.py @@ -43,10 +43,11 @@ def main(folder, intervl): postcounts = {id: len(pc) for (id, pc) in postcounts.items()} activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys()))) - activitynewusersinmonth = defaultdict(int) # TODO match month exactly + activitynewusersinmonth = defaultdict(int) for p in newposts: if firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate']: activitynewusersinmonth[p['OwnerUserId']] += 1 + for p in posts: for a in p['Answers']: if firstcontrib[a['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']: activitynewusersinmonth[p['OwnerUserId']] += 1 @@ -63,14 +64,16 @@ def main(folder, intervl): plt.xlabel("#posts") plt.ylabel("#users with X posts") fig.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) - plt.title("Histogram for user post count registered between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y")) + plt.title("Histogram for user post count between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y")) fig.savefig(histfilename + ".png", bbox_inches='tight') plt.close(fig) imgmagickcmd += " " + histfilename + ".png" # answers to new users - answers = (dmt(posts).map(lambda q: [a for a in q['Answers'] if option_date_from <= a['CreationDate'] < option_date_to - and firstcontrib[q['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']]) + answers = (dmt(posts).map(lambda q: [a for a in q['Answers'] + if option_date_from <= a['CreationDate'] < option_date_to # answer in interval + and firstcontrib[q['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > q['CreationDate'] # post created within 1 week of 1st contrib + and q['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer created within 1 week of post .getresults()) count = sum([len(a) for a in answers]) answerstonewusers.append(((option_date_from, option_date_to), count)) diff --git a/votes.py b/votes.py index 36cc8fb..2f163ea 100644 --- a/votes.py +++ b/votes.py @@ -36,12 +36,13 @@ def main(folder, intervl): print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) # avg sentiments scores = (dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to - and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) >= p['CreationDate']) + and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate']) .map(lambda p: p['Score']) .getresults()) filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound'] - for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to - and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) >= a['CreationDate']]) + for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to # post in interval + and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate'] # post within 1 week of 1st contrib + and p['CreationDate'] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']]) # answer within 1 week of post .filter(lambda p: p != []) .reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []) .getresults())