This commit is contained in:
wea_ondara
2019-08-13 18:24:41 +02:00
parent 0536f5db5f
commit e032a1f35b
3 changed files with 39 additions and 10 deletions

View File

@@ -20,7 +20,11 @@ def main(folder, intervl):
users, posts, firstcontrib, sumcontrib = load(folder) users, posts, firstcontrib, sumcontrib = load(folder)
intervals = calc_intervals(posts, intervl) intervals = calc_intervals(posts, intervl)
start = cms()
printnoln("reading sentiments ...")
cachedsentiments = imprt(folder + "/output/sentiments.py").answers cachedsentiments = imprt(folder + "/output/sentiments.py").answers
rprint("reading sentiments ... took " + str(cms() - start) + "ms")
outputdir = folder + "/output/batch/" outputdir = folder + "/output/batch/"
os.system("mkdir -p " + outputdir) os.system("mkdir -p " + outputdir)
@@ -100,9 +104,9 @@ def main(folder, intervl):
gcom.append(comlevelsflat) gcom.append(comlevelsflat)
fig, axs = plt.subplots(2, 2, figsize=(16, 12)) fig, axs = plt.subplots(2, 2, figsize=(16, 12))
axs[0, 0].set_title('Neg') axs[0, 0].set_title('Negativity')
axs[1, 0].set_title('Neu') axs[1, 0].set_title('Neutrality')
axs[0, 1].set_title('Pos') axs[0, 1].set_title('Positivity')
axs[1, 1].set_title('Compound') axs[1, 1].set_title('Compound')
axs[0, 0].hist(neglevelsflat, np.linspace(0, 1, 1 * 100)) axs[0, 0].hist(neglevelsflat, np.linspace(0, 1, 1 * 100))
axs[1, 0].hist(neulevelsflat, np.linspace(0, 1, 1 * 100)) axs[1, 0].hist(neulevelsflat, np.linspace(0, 1, 1 * 100))
@@ -115,7 +119,7 @@ def main(folder, intervl):
# plt.show() # plt.show()
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between " fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between "
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
# figsaver.save(fig, outfilename + ".png", bbox_inches='tight') # figsaver.save(fig, outfilename + ".png", bbox_inches='tight')
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...") printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
fig.savefig(outfilename + ".png", bbox_inches='tight') fig.savefig(outfilename + ".png", bbox_inches='tight')
@@ -194,7 +198,8 @@ def main(folder, intervl):
axs[1, 1].set_yscale('log') axs[1, 1].set_yscale('log')
# plt.show() # plt.show()
fig.suptitle("Sentiment of answers to posts by most posting users (95%tile)\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " +
option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...") printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight') fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)

View File

@@ -80,6 +80,7 @@ def plotbypost(onlyfiles, outputdir):
f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[p][i]) + "; ks neu = " + str(changes_neu[p][i]) f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[p][i]) + "; ks neu = " + str(changes_neu[p][i])
+ "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n") + "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n")
# pval # pval
magick = IMAGE_MAGICK
for (p, l) in files.items(): for (p, l) in files.items():
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)] x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12)) fig = plt.figure(figsize=(16, 12))
@@ -93,11 +94,16 @@ def plotbypost(onlyfiles, outputdir):
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with max " + str(p) + " posts") plt.title("KS 2-sided test with max " + str(p) + " posts")
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.xlabel("Comparision: time frame X - time frame X+1")
plt.ylabel("p-value")
plt.legend(loc="upper right") plt.legend(loc="upper right")
plt.savefig(outputdir + "/ks_post_pval_" + str(p) + ".png", bbox_inches='tight') plt.savefig(outputdir + "/ks_post_pval_" + str(p) + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
magick += " " + outputdir + "/ks_post_pval_" + str(p) + ".png"
os.system(magick + " " + outputdir + "/ks_post_pval.pdf")
# stat # stat
magick = IMAGE_MAGICK
for (p, l) in files.items(): for (p, l) in files.items():
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)] x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12)) fig = plt.figure(figsize=(16, 12))
@@ -111,9 +117,13 @@ def plotbypost(onlyfiles, outputdir):
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with max " + str(p) + " posts") plt.title("KS 2-sided test with max " + str(p) + " posts")
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.xlabel("Comparision: time frame X - time frame X+1")
plt.ylabel("stat value")
plt.legend(loc="upper right") plt.legend(loc="upper right")
plt.savefig(outputdir + "/ks_post_stat_" + str(p) + ".png", bbox_inches='tight') plt.savefig(outputdir + "/ks_post_stat_" + str(p) + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
magick += " " + outputdir + "/ks_post_stat_" + str(p) + ".png"
os.system(magick + " " + outputdir + "/ks_post_stat.pdf")
def plotbydate(onlyfiles, outputdir): def plotbydate(onlyfiles, outputdir):
@@ -122,6 +132,8 @@ def plotbydate(onlyfiles, outputdir):
for f in onlyfiles: for f in onlyfiles:
s = f[:-3].split("_") s = f[:-3].split("_")
files[(s[3], s[4])].append(f) files[(s[3], s[4])].append(f)
dates = sorted(files.keys(), key=lambda e: "-".join(reversed(e[0].split("-"))))
files = {d: files[d] for d in dates}
files = {d: sorted(l, key=lambda e: e.split("_")[5]) for (d, l) in files.items()} files = {d: sorted(l, key=lambda e: e.split("_")[5]) for (d, l) in files.items()}
changes_neg = defaultdict(list) changes_neg = defaultdict(list)
@@ -166,6 +178,7 @@ def plotbydate(onlyfiles, outputdir):
+ "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n") + "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n")
# pval # pval
magick = IMAGE_MAGICK
for (d, l) in files.items(): for (d, l) in files.items():
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)] x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12)) fig = plt.figure(figsize=(16, 12))
@@ -179,11 +192,16 @@ def plotbydate(onlyfiles, outputdir):
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1]) plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
plt.ylabel("p-value")
plt.legend(loc="upper right") plt.legend(loc="upper right")
plt.savefig(outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight') plt.savefig(outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
magick += " " + outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png"
os.system(magick + " " + outputdir + "/ks_date_pval.pdf")
# stat # stat
magick = IMAGE_MAGICK
for (d, l) in files.items(): for (d, l) in files.items():
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)] x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12)) fig = plt.figure(figsize=(16, 12))
@@ -197,9 +215,13 @@ def plotbydate(onlyfiles, outputdir):
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1]) plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
plt.ylabel("stat value")
plt.legend(loc="upper right") plt.legend(loc="upper right")
plt.savefig(outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight') plt.savefig(outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
magick += " " + outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png"
os.system(magick + " " + outputdir + "/ks_date_stat.pdf")
def plotbydateold(onlyfiles, oldfiles, outputdir): def plotbydateold(onlyfiles, oldfiles, outputdir):
@@ -253,11 +275,6 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
print("logs") print("logs")
for (d, l) in files.items(): for (d, l) in files.items():
# print(d)
# print("neg is: " + str(len(changes_neg[d])) + " should: " + str(len(l)))
# print("neu is: " + str(len(changes_neu[d])) + " should: " + str(len(l)))
# print("pos is: " + str(len(changes_pos[d])) + " should: " + str(len(l)))
# print("com is: " + str(len(changes_com[d])) + " should: " + str(len(l)))
f1 = oldfiles[d] f1 = oldfiles[d]
with open(outputdir + "/ks_olddate_" + d[0] + "_" + d[1] + ".log", "w") as f: with open(outputdir + "/ks_olddate_" + d[0] + "_" + d[1] + ".log", "w") as f:
for i in range(len(l)): for i in range(len(l)):
@@ -286,6 +303,8 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o') plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1]) plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
plt.ylabel("p-value")
plt.legend(loc="upper right") plt.legend(loc="upper right")
outfile = outputdir + "/ks_olddate_pval_" + d[0] + "_" + d[1] + ".png" outfile = outputdir + "/ks_olddate_pval_" + d[0] + "_" + d[1] + ".png"
plt.savefig(outfile, bbox_inches='tight') plt.savefig(outfile, bbox_inches='tight')
@@ -312,6 +331,8 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o') plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1]) plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
plt.ylabel("stat value")
plt.legend(loc="upper right") plt.legend(loc="upper right")
outfile = outputdir + "/ks_olddate_stat_" + d[0] + "_" + d[1] + ".png" outfile = outputdir + "/ks_olddate_stat_" + d[0] + "_" + d[1] + ".png"
plt.savefig(outfile, bbox_inches='tight') plt.savefig(outfile, bbox_inches='tight')

View File

@@ -8,6 +8,7 @@ from collections import defaultdict
from datetime import datetime from datetime import datetime
from mt import mt from mt import mt
import gc
TAG_RE = re.compile(r'<[^>]+>') TAG_RE = re.compile(r'<[^>]+>')
@@ -23,7 +24,9 @@ def cms(): return int(round(time.time() * 1000))
def load(folder): def load(folder):
users = readUsers(folder + "/Users.xml") users = readUsers(folder + "/Users.xml")
gc.collect()
posts = readPosts(folder + "/Posts.xml") posts = readPosts(folder + "/Posts.xml")
gc.collect()
# get first contribution to page: # get first contribution to page:
firstcontrib = computefirstcontrib(posts) firstcontrib = computefirstcontrib(posts)