This commit is contained in:
wea_ondara
2020-01-03 14:26:47 +01:00
parent e04da245ea
commit 296c0eb858

View File

@@ -6,6 +6,7 @@ from datetime import timedelta
from math import ceil from math import ceil
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib
import numpy as np import numpy as np
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK
@@ -18,6 +19,11 @@ colors = ['red', 'green', 'blue', 'orange', 'deeppink']
def main(folder, intervl): def main(folder, intervl):
# with open(folder + "/output/batch/logi", "w") as f:
# f.write(str(readavgsentsingle(folder + "/output/batch/averagesentiment.txt")))
# return
matplotlib.use('Agg') # speed up saving of images
users, posts, firstcontrib, sumcontrib = load(folder) users, posts, firstcontrib, sumcontrib = load(folder)
intervals = calc_intervals(posts, intervl) intervals = calc_intervals(posts, intervl)
@@ -36,6 +42,9 @@ def main(folder, intervl):
magickold = IMAGE_MAGICK magickold = IMAGE_MAGICK
magickglobal = IMAGE_MAGICK magickglobal = IMAGE_MAGICK
avgsent = [[] for i in range(0, 5 + 1)]
avgsentsingle = [[] for i in range(0, 5 + 1)]
for (option_date_from, option_date_to) in intervals: for (option_date_from, option_date_to) in intervals:
magickdate = IMAGE_MAGICK magickdate = IMAGE_MAGICK
@@ -91,6 +100,9 @@ def main(folder, intervl):
toxlevels.append(toxlevel) toxlevels.append(toxlevel)
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...") printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
avgsent[option_posts].append(np.mean([s['compound'] for s in toxlevels]))
avgsentsingle[option_posts].append([s['compound'] for s in toxlevels])
outfilename = goutfilenamenewusers + "_" + str(option_posts) outfilename = goutfilenamenewusers + "_" + str(option_posts)
dumptoxlevels(toxlevels, outfilename + ".py") dumptoxlevels(toxlevels, outfilename + ".py")
@@ -119,9 +131,8 @@ def main(folder, intervl):
axs[1, 1].set_yscale('log') axs[1, 1].set_yscale('log')
# plt.show() # plt.show()
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between " fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " posts within 1 week of 1st contribution\nPosts created between "
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts))) + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels)))
# figsaver.save(fig, outfilename + ".png", bbox_inches='tight')
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...") printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
fig.savefig(outfilename + ".png", bbox_inches='tight') fig.savefig(outfilename + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
@@ -146,9 +157,8 @@ def main(folder, intervl):
gaxs[0, 1].set_yscale('log') gaxs[0, 1].set_yscale('log')
gaxs[1, 1].set_yscale('log') gaxs[1, 1].set_yscale('log')
gfig.suptitle( gfig.suptitle(
"Sentiment of answers to the first X (max) posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime( "Sentiment of answers to the first X posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime(
"%d-%m-%Y")) "%d-%m-%Y"))
# figsaver.save(gfig, goutfilenamenewusers + ".png", bbox_inches='tight')
printnoln("\rglobal plot post ... plotting ... saving ...") printnoln("\rglobal plot post ... plotting ... saving ...")
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight') gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
plt.close(gfig) plt.close(gfig)
@@ -176,6 +186,8 @@ def main(folder, intervl):
print("Sentiment not found for " + a['Id']) print("Sentiment not found for " + a['Id'])
toxlevels.append(toxlevel) toxlevels.append(toxlevel)
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...") printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
avgsent[0].append(np.mean([s['compound'] for s in toxlevels]))
avgsentsingle[0].append([s['compound'] for s in toxlevels])
dumptoxlevels(toxlevels, goutfilenameoldusers + ".py") dumptoxlevels(toxlevels, goutfilenameoldusers + ".py")
@@ -198,9 +210,8 @@ def main(folder, intervl):
axs[0, 1].set_yscale('log') axs[0, 1].set_yscale('log')
axs[1, 1].set_yscale('log') axs[1, 1].set_yscale('log')
# plt.show()
fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " + fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " +
option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts))) option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels)))
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...") printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight') fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
@@ -212,6 +223,31 @@ def main(folder, intervl):
for (i, cmd) in magickpost.items(): for (i, cmd) in magickpost.items():
os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf") os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf")
# avg sentiment graph
print("Plotting average sentiments ...")
fig = plt.figure(figsize=(16, 12))
for i in postcounts:
plt.plot([iv[0] for iv in intervals], avgsent[i], label="new users (" + str(i) + " posts)")
plt.plot([iv[0] for iv in intervals], avgsent[0], label="old users (all posts)")
plt.title("Average sentiments")
plt.xticks(rotation=90)
plt.xlabel("time")
plt.ylabel("sentiment")
plt.legend(loc="upper right")
plt.savefig(outputdir + "/averagesentiment-i" + str(intervl) + ".png", bbox_inches='tight')
plt.close(fig)
# dump avgsentsingle
dumpavgsentsingle(avgsentsingle, outputdir + "/averagesentiment.txt")
avgss2 = readavgsentsingle(outputdir + "/averagesentiment.txt")
if avgsentsingle != avgss2:
print("wuaaaaaa")
with open(outputdir + "/log", "w") as file:
file.write(str(avgsentsingle))
file.write(str(avgss2))
# print("1: " + str(avgsentsingle))
# print("2: " + str(avgss2))
def dumptoxlevels(lvls, filename): def dumptoxlevels(lvls, filename):
with open(filename, "w") as file: with open(filename, "w") as file:
@@ -219,6 +255,21 @@ def dumptoxlevels(lvls, filename):
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n") file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
def dumpavgsentsingle(avg, filename):
with open(filename, "w") as file:
s = '\n'.join([str(i) + ':' + ';;'.join([';'.join([str(x) for x in a]) for a in avg[i]]) for i in range(len(avg))])
file.write(s)
def readavgsentsingle(filename):
with open(filename, "r") as file:
s = file.read()
s = s.split('\n')
s = [l.split(':', 2)[1] for l in s]
s = [[[float(x) for x in a.split(';')] if a != '' else [] for a in l.split(';;')] for l in s]
return s
if __name__ == "__main__": if __name__ == "__main__":
# execute only if run as a script # execute only if run as a script
usage = sys.argv[0] + " <folder>" usage = sys.argv[0] + " <folder>"