This commit is contained in:
wea_ondara
2019-07-18 13:58:27 +02:00
parent bca211551c
commit 3d4b1f26ba
3 changed files with 31 additions and 40 deletions

View File

@@ -29,11 +29,11 @@ def main(folder):
postcounts = range(1, 5 + 1)
for (option_date_from, option_date_to) in intervals:
# filter users by option_date_from <= creation date <= option_date_to
newusers = dmt(users).filter(lambda u: option_date_from <= u['CreationDate'] < option_date_to, "filtering users by creation").getresults()
newuserids = set(dmt(newusers).map(lambda u: u['Id'], "get user id list").getresults())
# newusers = dmt(users).filter(lambda u: option_date_from <= u['CreationDate'] < option_date_to, "filtering users by creation").getresults()
# newuserids = set(dmt(newusers).map(lambda u: u['Id'], "get user id list").getresults())
# get questions for filtered users
newposts = dmt(posts).filter(lambda p: p['OwnerUserId'] in newuserids, "filter posts by selected users").getresults()
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filter posts by dates").getresults()
if len(newposts) == 0:
continue
print("computing toxic levels: " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
@@ -48,7 +48,8 @@ def main(folder):
gpos = []
gcom = []
outfolder = "output/batch/" + folder.split("/")[-1] + "/"
outfolder = folder + "/output/batch/"
os.system("mkdir -p " + outfolder)
goutfilename = outfolder + "batch_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
for option_posts in postcounts:
@@ -90,7 +91,6 @@ def main(folder):
rprint("computing toxic levels: post #" + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... took " + str(cms() - start) + "ms")
outfilename = goutfilename + "_" + str(option_posts)
os.system("mkdir -p " + outfolder)
dumptoxlevels(toxlevels, outfilename + ".py")
neglevelsflat = [item['neg'] for item in flatmap(toxlevels.values())]
@@ -118,7 +118,7 @@ def main(folder):
axs[1, 1].set_yscale('log')
# plt.show()
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts\nUsers registered between "
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between "
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
fig.savefig(outfilename + ".png", bbox_inches='tight')
plt.close(fig)
@@ -136,7 +136,7 @@ def main(folder):
gaxs[1, 0].set_yscale('log')
gaxs[0, 1].set_yscale('log')
gaxs[1, 1].set_yscale('log')
gfig.suptitle("Sentiment of answers to the first X (max) posts\nUsers registered between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
gfig.suptitle("Sentiment of answers to the first X (max) posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
gfig.savefig(goutfilename + ".png", bbox_inches='tight')
plt.close(gfig)

View File

@@ -13,20 +13,18 @@ colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
def main(folder):
if folder.endswith("/"):
folder = folder[:-1]
onlyfiles = [f for f in listdir(folder)]
onlyfiles = [f for f in onlyfiles if isfile(join(folder, f))]
onlyfiles = [f for f in onlyfiles if f.endswith(".py")]
# onlyfiles = [f[:-3] for f in onlyfiles]
# onlyfiles = [f.replace(".", "\.") for f in onlyfiles]
outputdir = folder + "/output/ksbatch/"
os.system("mkdir -p " + outputdir)
folder = folder + "/output/batch/"
onlyfiles = [folder + f for f in listdir(folder) if isfile(join(folder, f)) and f.endswith(".py")]
onlyfiles = sorted(onlyfiles)
plotbypost(onlyfiles)
plotbydate(onlyfiles)
plotbypost(onlyfiles, outputdir)
plotbydate(onlyfiles, outputdir)
def plotbypost(onlyfiles):
def plotbypost(onlyfiles, outputdir):
files = defaultdict(list)
for f in onlyfiles:
s = f[:-3].split("_")
@@ -43,8 +41,8 @@ def plotbypost(onlyfiles):
continue
print(p)
for i in range(len(l) - 1):
tox1 = imprt(folder + "/" + l[i]).toxlevels
tox2 = imprt(folder + "/" + l[i + 1]).toxlevels
tox1 = imprt(l[i]).toxlevels
tox2 = imprt(l[i + 1]).toxlevels
neglevelsflat1 = [item['neg'] for item in flatmap(tox1.values())]
neulevelsflat1 = [item['neu'] for item in flatmap(tox1.values())]
@@ -67,7 +65,7 @@ def plotbypost(onlyfiles):
changes_com[p].append(kscom)
for (p, l) in files.items():
with open(folder + "/ks_" + str(p) + ".log", "w") as f:
with open(outputdir + "/ks_post_" + str(p) + ".log", "w") as f:
for i in range(len(l) - 1):
f1 = l[i]
f2 = l[i + 1]
@@ -88,7 +86,7 @@ def plotbypost(onlyfiles):
plt.title("KS 2-sided test with max " + str(p) + " posts")
plt.xticks(rotation=90)
plt.legend(loc="upper right")
plt.savefig(folder + "/ks_pval_" + str(p) + ".png", bbox_inches='tight')
plt.savefig(outputdir + "/ks_post_pval_" + str(p) + ".png", bbox_inches='tight')
plt.close(fig)
# stat
@@ -106,11 +104,11 @@ def plotbypost(onlyfiles):
plt.title("KS 2-sided test with max " + str(p) + " posts")
plt.xticks(rotation=90)
plt.legend(loc="upper right")
plt.savefig(folder + "/ks_stat_" + str(p) + ".png", bbox_inches='tight')
plt.savefig(outputdir + "/ks_post_stat_" + str(p) + ".png", bbox_inches='tight')
plt.close(fig)
def plotbydate(onlyfiles):
def plotbydate(onlyfiles, outputdir):
files = defaultdict(list)
for f in onlyfiles:
s = f[:-3].split("_")
@@ -127,8 +125,8 @@ def plotbydate(onlyfiles):
continue
print(d)
for i in range(len(l) - 1):
tox1 = imprt(folder + "/" + l[i]).toxlevels
tox2 = imprt(folder + "/" + l[i + 1]).toxlevels
tox1 = imprt(l[i]).toxlevels
tox2 = imprt(l[i + 1]).toxlevels
neglevelsflat1 = [item['neg'] for item in flatmap(tox1.values())]
neulevelsflat1 = [item['neu'] for item in flatmap(tox1.values())]
@@ -151,7 +149,7 @@ def plotbydate(onlyfiles):
changes_com[d].append(kscom)
for (d, l) in files.items():
with open(folder + "/ks_" + d[0] + "_" + d[1] + ".log", "w") as f:
with open(outputdir + "/ks_date_" + d[0] + "_" + d[1] + ".log", "w") as f:
for i in range(len(l) - 1):
f1 = l[i]
f2 = l[i + 1]
@@ -173,7 +171,7 @@ def plotbydate(onlyfiles):
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
plt.xticks(rotation=90)
plt.legend(loc="upper right")
plt.savefig(folder + "/ks_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.savefig(outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.close(fig)
# stat
@@ -191,7 +189,7 @@ def plotbydate(onlyfiles):
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
plt.xticks(rotation=90)
plt.legend(loc="upper right")
plt.savefig(folder + "/ks_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.savefig(outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.close(fig)

View File

@@ -28,8 +28,8 @@ def main(folder):
print((option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y")))
# filter posts by option_date_from <= creation date <= option_date_to
newusers = set(dmt(users).filter(lambda u: option_date_from <= u['CreationDate'] < option_date_to, "filtering users by creation").map(lambda u: u['Id'], "getting user ids").getresults())
newposts = dmt(posts).filter(lambda p: p['OwnerUserId'] in newusers, "filtering posts by users").getresults()
# newusers = set(dmt(users).filter(lambda u: option_date_from <= u['CreationDate'] < option_date_to, "filtering users by creation").map(lambda u: u['Id'], "getting user ids").getresults())
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
postcounts = defaultdict(list)
i = 0
@@ -39,16 +39,9 @@ def main(folder):
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
# print("i: " + str(i) + " expected: " + str(len(newposts)) + " is: " + str(sum([pc for pc in postcounts.values()])))
os.system("mkdir -p " + folder + "/output")
histfilename = folder + "/output/posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
countfilename = folder + "/output/postcount_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
# fig = plt.figure(figsize=(16, 12))
# plt.plot(userids, [len(pc) for pc in postcounts])
# plt.title("Post count for users between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
# plt.xticks(rotation=90)
# fig.savefig(countfilename + ".png", bbox_inches='tight')
# plt.close(fig)
outputdir = folder + "/output/posthist/"
os.system("mkdir -p " + outputdir)
histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
histdata = [pc for pc in postcounts.values()]
fig = plt.figure(figsize=(16, 12))