This commit is contained in:
wea_ondara
2019-12-25 13:49:57 +01:00
parent 2c1524a335
commit 19f5835e3a
2 changed files with 103 additions and 84 deletions

View File

@@ -54,8 +54,21 @@ def main(folder, intervl):
gpos = []
gcom = []
goutfilenamenewusers = outputdir + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
goutfilenameoldusers = outputdir + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
goutfilenamenewusers = outputdir + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") + "_i" + str(intervl)
goutfilenameoldusers = outputdir + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") + "_i" + str(intervl)
start = cms()
printnoln("sorting posts ...")
sortedposts = defaultdict(list)
for (i, post) in enumerate(newposts):
userid = post['OwnerUserId']
# check first contribution
if firstcontrib[userid] + timedelta(days=DAYS_NEW_USER) < post['CreationDate']:
continue
sortedposts[userid].append(post)
rprint("sorting posts ... took " + str(cms() - start) + "ms")
for option_posts in postcounts:
# print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + " - #posts: " + str(option_posts))
@@ -64,22 +77,8 @@ def main(folder, intervl):
start = cms()
printnoln("computing toxic levels: filtering")
toxlevels = []
searchedposts = defaultdict(int)
filteredposts = []
for (i, post) in enumerate(newposts):
userid = post['OwnerUserId']
# check first contribution
if firstcontrib[userid] + timedelta(days=DAYS_NEW_USER) < post['CreationDate']:
continue
# no more than option_posts posts from one user
searchedposts[userid] += 1
if searchedposts[userid] > option_posts:
continue
filteredposts.append(post)
filteredposts = [posts for (_, posts) in sortedposts.items() if len(posts) == option_posts]
filteredposts = [p for posts in filteredposts for p in posts]
for (i, post) in enumerate(filteredposts):
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
for a in post['Answers']:
@@ -87,6 +86,7 @@ def main(folder, intervl):
toxlevel = cachedsentiments[a['Id']]
else:
print("Sentiment not found for " + a['Id'])
continue
toxlevels.append(toxlevel)
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
@@ -206,10 +206,10 @@ def main(folder, intervl):
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
magickold += " " + goutfilenameoldusers + ".png"
os.system(magickglobal + " " + outputdir + "batch_newusers.pdf")
os.system(magickold + " " + outputdir + "batch_oldusers.pdf")
os.system(magickglobal + " " + outputdir + "batch_newusers_i" + str(intervl) + ".pdf")
os.system(magickold + " " + outputdir + "batch_oldusers_i" + str(intervl) + ".pdf")
for (i, cmd) in magickpost.items():
os.system(cmd + " " + outputdir + "batch_newusers_" + str(i) + ".pdf")
os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf")
def dumptoxlevels(lvls, filename):