wip
This commit is contained in:
@@ -20,7 +20,11 @@ def main(folder, intervl):
|
|||||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||||
|
|
||||||
intervals = calc_intervals(posts, intervl)
|
intervals = calc_intervals(posts, intervl)
|
||||||
|
|
||||||
|
start = cms()
|
||||||
|
printnoln("reading sentiments ...")
|
||||||
cachedsentiments = imprt(folder + "/output/sentiments.py").answers
|
cachedsentiments = imprt(folder + "/output/sentiments.py").answers
|
||||||
|
rprint("reading sentiments ... took " + str(cms() - start) + "ms")
|
||||||
|
|
||||||
outputdir = folder + "/output/batch/"
|
outputdir = folder + "/output/batch/"
|
||||||
os.system("mkdir -p " + outputdir)
|
os.system("mkdir -p " + outputdir)
|
||||||
@@ -100,9 +104,9 @@ def main(folder, intervl):
|
|||||||
gcom.append(comlevelsflat)
|
gcom.append(comlevelsflat)
|
||||||
|
|
||||||
fig, axs = plt.subplots(2, 2, figsize=(16, 12))
|
fig, axs = plt.subplots(2, 2, figsize=(16, 12))
|
||||||
axs[0, 0].set_title('Neg')
|
axs[0, 0].set_title('Negativity')
|
||||||
axs[1, 0].set_title('Neu')
|
axs[1, 0].set_title('Neutrality')
|
||||||
axs[0, 1].set_title('Pos')
|
axs[0, 1].set_title('Positivity')
|
||||||
axs[1, 1].set_title('Compound')
|
axs[1, 1].set_title('Compound')
|
||||||
axs[0, 0].hist(neglevelsflat, np.linspace(0, 1, 1 * 100))
|
axs[0, 0].hist(neglevelsflat, np.linspace(0, 1, 1 * 100))
|
||||||
axs[1, 0].hist(neulevelsflat, np.linspace(0, 1, 1 * 100))
|
axs[1, 0].hist(neulevelsflat, np.linspace(0, 1, 1 * 100))
|
||||||
@@ -115,7 +119,7 @@ def main(folder, intervl):
|
|||||||
|
|
||||||
# plt.show()
|
# plt.show()
|
||||||
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between "
|
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between "
|
||||||
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
|
||||||
# figsaver.save(fig, outfilename + ".png", bbox_inches='tight')
|
# figsaver.save(fig, outfilename + ".png", bbox_inches='tight')
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
||||||
fig.savefig(outfilename + ".png", bbox_inches='tight')
|
fig.savefig(outfilename + ".png", bbox_inches='tight')
|
||||||
@@ -194,7 +198,8 @@ def main(folder, intervl):
|
|||||||
axs[1, 1].set_yscale('log')
|
axs[1, 1].set_yscale('log')
|
||||||
|
|
||||||
# plt.show()
|
# plt.show()
|
||||||
fig.suptitle("Sentiment of answers to posts by most posting users (95%tile)\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " +
|
||||||
|
option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
||||||
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
|
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|||||||
@@ -80,6 +80,7 @@ def plotbypost(onlyfiles, outputdir):
|
|||||||
f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[p][i]) + "; ks neu = " + str(changes_neu[p][i])
|
f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[p][i]) + "; ks neu = " + str(changes_neu[p][i])
|
||||||
+ "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n")
|
+ "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n")
|
||||||
# pval
|
# pval
|
||||||
|
magick = IMAGE_MAGICK
|
||||||
for (p, l) in files.items():
|
for (p, l) in files.items():
|
||||||
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
||||||
fig = plt.figure(figsize=(16, 12))
|
fig = plt.figure(figsize=(16, 12))
|
||||||
@@ -93,11 +94,16 @@ def plotbypost(onlyfiles, outputdir):
|
|||||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||||
plt.title("KS 2-sided test with max " + str(p) + " posts")
|
plt.title("KS 2-sided test with max " + str(p) + " posts")
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||||
|
plt.ylabel("p-value")
|
||||||
plt.legend(loc="upper right")
|
plt.legend(loc="upper right")
|
||||||
plt.savefig(outputdir + "/ks_post_pval_" + str(p) + ".png", bbox_inches='tight')
|
plt.savefig(outputdir + "/ks_post_pval_" + str(p) + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
magick += " " + outputdir + "/ks_post_pval_" + str(p) + ".png"
|
||||||
|
os.system(magick + " " + outputdir + "/ks_post_pval.pdf")
|
||||||
|
|
||||||
# stat
|
# stat
|
||||||
|
magick = IMAGE_MAGICK
|
||||||
for (p, l) in files.items():
|
for (p, l) in files.items():
|
||||||
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
||||||
fig = plt.figure(figsize=(16, 12))
|
fig = plt.figure(figsize=(16, 12))
|
||||||
@@ -111,9 +117,13 @@ def plotbypost(onlyfiles, outputdir):
|
|||||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||||
plt.title("KS 2-sided test with max " + str(p) + " posts")
|
plt.title("KS 2-sided test with max " + str(p) + " posts")
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||||
|
plt.ylabel("stat value")
|
||||||
plt.legend(loc="upper right")
|
plt.legend(loc="upper right")
|
||||||
plt.savefig(outputdir + "/ks_post_stat_" + str(p) + ".png", bbox_inches='tight')
|
plt.savefig(outputdir + "/ks_post_stat_" + str(p) + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
magick += " " + outputdir + "/ks_post_stat_" + str(p) + ".png"
|
||||||
|
os.system(magick + " " + outputdir + "/ks_post_stat.pdf")
|
||||||
|
|
||||||
|
|
||||||
def plotbydate(onlyfiles, outputdir):
|
def plotbydate(onlyfiles, outputdir):
|
||||||
@@ -122,6 +132,8 @@ def plotbydate(onlyfiles, outputdir):
|
|||||||
for f in onlyfiles:
|
for f in onlyfiles:
|
||||||
s = f[:-3].split("_")
|
s = f[:-3].split("_")
|
||||||
files[(s[3], s[4])].append(f)
|
files[(s[3], s[4])].append(f)
|
||||||
|
dates = sorted(files.keys(), key=lambda e: "-".join(reversed(e[0].split("-"))))
|
||||||
|
files = {d: files[d] for d in dates}
|
||||||
files = {d: sorted(l, key=lambda e: e.split("_")[5]) for (d, l) in files.items()}
|
files = {d: sorted(l, key=lambda e: e.split("_")[5]) for (d, l) in files.items()}
|
||||||
|
|
||||||
changes_neg = defaultdict(list)
|
changes_neg = defaultdict(list)
|
||||||
@@ -166,6 +178,7 @@ def plotbydate(onlyfiles, outputdir):
|
|||||||
+ "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n")
|
+ "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n")
|
||||||
|
|
||||||
# pval
|
# pval
|
||||||
|
magick = IMAGE_MAGICK
|
||||||
for (d, l) in files.items():
|
for (d, l) in files.items():
|
||||||
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
||||||
fig = plt.figure(figsize=(16, 12))
|
fig = plt.figure(figsize=(16, 12))
|
||||||
@@ -179,11 +192,16 @@ def plotbydate(onlyfiles, outputdir):
|
|||||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||||
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
|
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
|
||||||
|
plt.ylabel("p-value")
|
||||||
plt.legend(loc="upper right")
|
plt.legend(loc="upper right")
|
||||||
plt.savefig(outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
plt.savefig(outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
magick += " " + outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png"
|
||||||
|
os.system(magick + " " + outputdir + "/ks_date_pval.pdf")
|
||||||
|
|
||||||
# stat
|
# stat
|
||||||
|
magick = IMAGE_MAGICK
|
||||||
for (d, l) in files.items():
|
for (d, l) in files.items():
|
||||||
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
||||||
fig = plt.figure(figsize=(16, 12))
|
fig = plt.figure(figsize=(16, 12))
|
||||||
@@ -197,9 +215,13 @@ def plotbydate(onlyfiles, outputdir):
|
|||||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||||
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
|
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
|
||||||
|
plt.ylabel("stat value")
|
||||||
plt.legend(loc="upper right")
|
plt.legend(loc="upper right")
|
||||||
plt.savefig(outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
plt.savefig(outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
magick += " " + outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png"
|
||||||
|
os.system(magick + " " + outputdir + "/ks_date_stat.pdf")
|
||||||
|
|
||||||
|
|
||||||
def plotbydateold(onlyfiles, oldfiles, outputdir):
|
def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||||
@@ -253,11 +275,6 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
|||||||
|
|
||||||
print("logs")
|
print("logs")
|
||||||
for (d, l) in files.items():
|
for (d, l) in files.items():
|
||||||
# print(d)
|
|
||||||
# print("neg is: " + str(len(changes_neg[d])) + " should: " + str(len(l)))
|
|
||||||
# print("neu is: " + str(len(changes_neu[d])) + " should: " + str(len(l)))
|
|
||||||
# print("pos is: " + str(len(changes_pos[d])) + " should: " + str(len(l)))
|
|
||||||
# print("com is: " + str(len(changes_com[d])) + " should: " + str(len(l)))
|
|
||||||
f1 = oldfiles[d]
|
f1 = oldfiles[d]
|
||||||
with open(outputdir + "/ks_olddate_" + d[0] + "_" + d[1] + ".log", "w") as f:
|
with open(outputdir + "/ks_olddate_" + d[0] + "_" + d[1] + ".log", "w") as f:
|
||||||
for i in range(len(l)):
|
for i in range(len(l)):
|
||||||
@@ -286,6 +303,8 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
|||||||
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
|
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
|
||||||
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
|
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
|
||||||
|
plt.ylabel("p-value")
|
||||||
plt.legend(loc="upper right")
|
plt.legend(loc="upper right")
|
||||||
outfile = outputdir + "/ks_olddate_pval_" + d[0] + "_" + d[1] + ".png"
|
outfile = outputdir + "/ks_olddate_pval_" + d[0] + "_" + d[1] + ".png"
|
||||||
plt.savefig(outfile, bbox_inches='tight')
|
plt.savefig(outfile, bbox_inches='tight')
|
||||||
@@ -312,6 +331,8 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
|||||||
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
|
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
|
||||||
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
|
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
|
||||||
|
plt.ylabel("stat value")
|
||||||
plt.legend(loc="upper right")
|
plt.legend(loc="upper right")
|
||||||
outfile = outputdir + "/ks_olddate_stat_" + d[0] + "_" + d[1] + ".png"
|
outfile = outputdir + "/ks_olddate_stat_" + d[0] + "_" + d[1] + ".png"
|
||||||
plt.savefig(outfile, bbox_inches='tight')
|
plt.savefig(outfile, bbox_inches='tight')
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from collections import defaultdict
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from mt import mt
|
from mt import mt
|
||||||
|
import gc
|
||||||
|
|
||||||
TAG_RE = re.compile(r'<[^>]+>')
|
TAG_RE = re.compile(r'<[^>]+>')
|
||||||
|
|
||||||
@@ -23,7 +24,9 @@ def cms(): return int(round(time.time() * 1000))
|
|||||||
|
|
||||||
def load(folder):
|
def load(folder):
|
||||||
users = readUsers(folder + "/Users.xml")
|
users = readUsers(folder + "/Users.xml")
|
||||||
|
gc.collect()
|
||||||
posts = readPosts(folder + "/Posts.xml")
|
posts = readPosts(folder + "/Posts.xml")
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
# get first contribution to page:
|
# get first contribution to page:
|
||||||
firstcontrib = computefirstcontrib(posts)
|
firstcontrib = computefirstcontrib(posts)
|
||||||
|
|||||||
Reference in New Issue
Block a user