This commit is contained in:
wea_ondara
2020-01-03 14:26:47 +01:00
parent e04da245ea
commit 296c0eb858

View File

@@ -6,6 +6,7 @@ from datetime import timedelta
from math import ceil
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK
@@ -18,6 +19,11 @@ colors = ['red', 'green', 'blue', 'orange', 'deeppink']
def main(folder, intervl):
# with open(folder + "/output/batch/logi", "w") as f:
# f.write(str(readavgsentsingle(folder + "/output/batch/averagesentiment.txt")))
# return
matplotlib.use('Agg') # speed up saving of images
users, posts, firstcontrib, sumcontrib = load(folder)
intervals = calc_intervals(posts, intervl)
@@ -36,6 +42,9 @@ def main(folder, intervl):
magickold = IMAGE_MAGICK
magickglobal = IMAGE_MAGICK
avgsent = [[] for i in range(0, 5 + 1)]
avgsentsingle = [[] for i in range(0, 5 + 1)]
for (option_date_from, option_date_to) in intervals:
magickdate = IMAGE_MAGICK
@@ -91,6 +100,9 @@ def main(folder, intervl):
toxlevels.append(toxlevel)
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
avgsent[option_posts].append(np.mean([s['compound'] for s in toxlevels]))
avgsentsingle[option_posts].append([s['compound'] for s in toxlevels])
outfilename = goutfilenamenewusers + "_" + str(option_posts)
dumptoxlevels(toxlevels, outfilename + ".py")
@@ -119,9 +131,8 @@ def main(folder, intervl):
axs[1, 1].set_yscale('log')
# plt.show()
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between "
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
# figsaver.save(fig, outfilename + ".png", bbox_inches='tight')
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " posts within 1 week of 1st contribution\nPosts created between "
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels)))
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
fig.savefig(outfilename + ".png", bbox_inches='tight')
plt.close(fig)
@@ -146,9 +157,8 @@ def main(folder, intervl):
gaxs[0, 1].set_yscale('log')
gaxs[1, 1].set_yscale('log')
gfig.suptitle(
"Sentiment of answers to the first X (max) posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime(
"Sentiment of answers to the first X posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime(
"%d-%m-%Y"))
# figsaver.save(gfig, goutfilenamenewusers + ".png", bbox_inches='tight')
printnoln("\rglobal plot post ... plotting ... saving ...")
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
plt.close(gfig)
@@ -176,6 +186,8 @@ def main(folder, intervl):
print("Sentiment not found for " + a['Id'])
toxlevels.append(toxlevel)
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
avgsent[0].append(np.mean([s['compound'] for s in toxlevels]))
avgsentsingle[0].append([s['compound'] for s in toxlevels])
dumptoxlevels(toxlevels, goutfilenameoldusers + ".py")
@@ -198,9 +210,8 @@ def main(folder, intervl):
axs[0, 1].set_yscale('log')
axs[1, 1].set_yscale('log')
# plt.show()
fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " +
option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels)))
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
plt.close(fig)
@@ -212,6 +223,31 @@ def main(folder, intervl):
for (i, cmd) in magickpost.items():
os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf")
# avg sentiment graph
print("Plotting average sentiments ...")
fig = plt.figure(figsize=(16, 12))
for i in postcounts:
plt.plot([iv[0] for iv in intervals], avgsent[i], label="new users (" + str(i) + " posts)")
plt.plot([iv[0] for iv in intervals], avgsent[0], label="old users (all posts)")
plt.title("Average sentiments")
plt.xticks(rotation=90)
plt.xlabel("time")
plt.ylabel("sentiment")
plt.legend(loc="upper right")
plt.savefig(outputdir + "/averagesentiment-i" + str(intervl) + ".png", bbox_inches='tight')
plt.close(fig)
# dump avgsentsingle
dumpavgsentsingle(avgsentsingle, outputdir + "/averagesentiment.txt")
avgss2 = readavgsentsingle(outputdir + "/averagesentiment.txt")
if avgsentsingle != avgss2:
print("wuaaaaaa")
with open(outputdir + "/log", "w") as file:
file.write(str(avgsentsingle))
file.write(str(avgss2))
# print("1: " + str(avgsentsingle))
# print("2: " + str(avgss2))
def dumptoxlevels(lvls, filename):
with open(filename, "w") as file:
@@ -219,6 +255,21 @@ def dumptoxlevels(lvls, filename):
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
def dumpavgsentsingle(avg, filename):
with open(filename, "w") as file:
s = '\n'.join([str(i) + ':' + ';;'.join([';'.join([str(x) for x in a]) for a in avg[i]]) for i in range(len(avg))])
file.write(s)
def readavgsentsingle(filename):
with open(filename, "r") as file:
s = file.read()
s = s.split('\n')
s = [l.split(':', 2)[1] for l in s]
s = [[[float(x) for x in a.split(';')] if a != '' else [] for a in l.split(';;')] for l in s]
return s
if __name__ == "__main__":
# execute only if run as a script
usage = sys.argv[0] + " <folder>"