diff --git a/analyze_batch.py b/analyze_batch.py index d63eb47..933b40f 100644 --- a/analyze_batch.py +++ b/analyze_batch.py @@ -6,6 +6,7 @@ from datetime import timedelta from math import ceil import matplotlib.pyplot as plt +import matplotlib import numpy as np from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK @@ -18,6 +19,11 @@ colors = ['red', 'green', 'blue', 'orange', 'deeppink'] def main(folder, intervl): + # with open(folder + "/output/batch/logi", "w") as f: + # f.write(str(readavgsentsingle(folder + "/output/batch/averagesentiment.txt"))) + # return + matplotlib.use('Agg') # speed up saving of images + users, posts, firstcontrib, sumcontrib = load(folder) intervals = calc_intervals(posts, intervl) @@ -36,6 +42,9 @@ def main(folder, intervl): magickold = IMAGE_MAGICK magickglobal = IMAGE_MAGICK + avgsent = [[] for i in range(0, 5 + 1)] + avgsentsingle = [[] for i in range(0, 5 + 1)] + for (option_date_from, option_date_to) in intervals: magickdate = IMAGE_MAGICK @@ -91,6 +100,9 @@ def main(folder, intervl): toxlevels.append(toxlevel) printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...") + avgsent[option_posts].append(np.mean([s['compound'] for s in toxlevels])) + avgsentsingle[option_posts].append([s['compound'] for s in toxlevels]) + outfilename = goutfilenamenewusers + "_" + str(option_posts) dumptoxlevels(toxlevels, outfilename + ".py") @@ -119,9 +131,8 @@ def main(folder, intervl): axs[1, 1].set_yscale('log') # plt.show() - fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between " - + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts))) - # figsaver.save(fig, outfilename + ".png", bbox_inches='tight') + fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " posts within 1 week of 1st contribution\nPosts created between " + + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels))) printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...") fig.savefig(outfilename + ".png", bbox_inches='tight') plt.close(fig) @@ -146,9 +157,8 @@ def main(folder, intervl): gaxs[0, 1].set_yscale('log') gaxs[1, 1].set_yscale('log') gfig.suptitle( - "Sentiment of answers to the first X (max) posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime( + "Sentiment of answers to the first X posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime( "%d-%m-%Y")) - # figsaver.save(gfig, goutfilenamenewusers + ".png", bbox_inches='tight') printnoln("\rglobal plot post ... plotting ... saving ...") gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight') plt.close(gfig) @@ -176,6 +186,8 @@ def main(folder, intervl): print("Sentiment not found for " + a['Id']) toxlevels.append(toxlevel) printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...") + avgsent[0].append(np.mean([s['compound'] for s in toxlevels])) + avgsentsingle[0].append([s['compound'] for s in toxlevels]) dumptoxlevels(toxlevels, goutfilenameoldusers + ".py") @@ -198,9 +210,8 @@ def main(folder, intervl): axs[0, 1].set_yscale('log') axs[1, 1].set_yscale('log') - # plt.show() fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " + - option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts))) + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels))) printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...") fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight') plt.close(fig) @@ -212,6 +223,31 @@ def main(folder, intervl): for (i, cmd) in magickpost.items(): os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf") + # avg sentiment graph + print("Plotting average sentiments ...") + fig = plt.figure(figsize=(16, 12)) + for i in postcounts: + plt.plot([iv[0] for iv in intervals], avgsent[i], label="new users (" + str(i) + " posts)") + plt.plot([iv[0] for iv in intervals], avgsent[0], label="old users (all posts)") + plt.title("Average sentiments") + plt.xticks(rotation=90) + plt.xlabel("time") + plt.ylabel("sentiment") + plt.legend(loc="upper right") + plt.savefig(outputdir + "/averagesentiment-i" + str(intervl) + ".png", bbox_inches='tight') + plt.close(fig) + + # dump avgsentsingle + dumpavgsentsingle(avgsentsingle, outputdir + "/averagesentiment.txt") + avgss2 = readavgsentsingle(outputdir + "/averagesentiment.txt") + if avgsentsingle != avgss2: + print("wuaaaaaa") + with open(outputdir + "/log", "w") as file: + file.write(str(avgsentsingle)) + file.write(str(avgss2)) + # print("1: " + str(avgsentsingle)) + # print("2: " + str(avgss2)) + def dumptoxlevels(lvls, filename): with open(filename, "w") as file: @@ -219,6 +255,21 @@ def dumptoxlevels(lvls, filename): file.write("toxlevels = " + str(lvls).replace("", "list", 1) + "\n") +def dumpavgsentsingle(avg, filename): + with open(filename, "w") as file: + s = '\n'.join([str(i) + ':' + ';;'.join([';'.join([str(x) for x in a]) for a in avg[i]]) for i in range(len(avg))]) + file.write(s) + + +def readavgsentsingle(filename): + with open(filename, "r") as file: + s = file.read() + s = s.split('\n') + s = [l.split(':', 2)[1] for l in s] + s = [[[float(x) for x in a.split(';')] if a != '' else [] for a in l.split(';;')] for l in s] + return s + + if __name__ == "__main__": # execute only if run as a script usage = sys.argv[0] + " "