wip
This commit is contained in:
@@ -6,6 +6,7 @@ from datetime import timedelta
|
|||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK
|
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK
|
||||||
@@ -18,6 +19,11 @@ colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
|||||||
|
|
||||||
|
|
||||||
def main(folder, intervl):
|
def main(folder, intervl):
|
||||||
|
# with open(folder + "/output/batch/logi", "w") as f:
|
||||||
|
# f.write(str(readavgsentsingle(folder + "/output/batch/averagesentiment.txt")))
|
||||||
|
# return
|
||||||
|
matplotlib.use('Agg') # speed up saving of images
|
||||||
|
|
||||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||||
|
|
||||||
intervals = calc_intervals(posts, intervl)
|
intervals = calc_intervals(posts, intervl)
|
||||||
@@ -36,6 +42,9 @@ def main(folder, intervl):
|
|||||||
magickold = IMAGE_MAGICK
|
magickold = IMAGE_MAGICK
|
||||||
magickglobal = IMAGE_MAGICK
|
magickglobal = IMAGE_MAGICK
|
||||||
|
|
||||||
|
avgsent = [[] for i in range(0, 5 + 1)]
|
||||||
|
avgsentsingle = [[] for i in range(0, 5 + 1)]
|
||||||
|
|
||||||
for (option_date_from, option_date_to) in intervals:
|
for (option_date_from, option_date_to) in intervals:
|
||||||
magickdate = IMAGE_MAGICK
|
magickdate = IMAGE_MAGICK
|
||||||
|
|
||||||
@@ -91,6 +100,9 @@ def main(folder, intervl):
|
|||||||
toxlevels.append(toxlevel)
|
toxlevels.append(toxlevel)
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
||||||
|
|
||||||
|
avgsent[option_posts].append(np.mean([s['compound'] for s in toxlevels]))
|
||||||
|
avgsentsingle[option_posts].append([s['compound'] for s in toxlevels])
|
||||||
|
|
||||||
outfilename = goutfilenamenewusers + "_" + str(option_posts)
|
outfilename = goutfilenamenewusers + "_" + str(option_posts)
|
||||||
dumptoxlevels(toxlevels, outfilename + ".py")
|
dumptoxlevels(toxlevels, outfilename + ".py")
|
||||||
|
|
||||||
@@ -119,9 +131,8 @@ def main(folder, intervl):
|
|||||||
axs[1, 1].set_yscale('log')
|
axs[1, 1].set_yscale('log')
|
||||||
|
|
||||||
# plt.show()
|
# plt.show()
|
||||||
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts within 1 week of 1st contribution\nPosts created between "
|
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " posts within 1 week of 1st contribution\nPosts created between "
|
||||||
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
|
+ option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels)))
|
||||||
# figsaver.save(fig, outfilename + ".png", bbox_inches='tight')
|
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
||||||
fig.savefig(outfilename + ".png", bbox_inches='tight')
|
fig.savefig(outfilename + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
@@ -146,9 +157,8 @@ def main(folder, intervl):
|
|||||||
gaxs[0, 1].set_yscale('log')
|
gaxs[0, 1].set_yscale('log')
|
||||||
gaxs[1, 1].set_yscale('log')
|
gaxs[1, 1].set_yscale('log')
|
||||||
gfig.suptitle(
|
gfig.suptitle(
|
||||||
"Sentiment of answers to the first X (max) posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime(
|
"Sentiment of answers to the first X posts within 1 week of 1st contribution\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime(
|
||||||
"%d-%m-%Y"))
|
"%d-%m-%Y"))
|
||||||
# figsaver.save(gfig, goutfilenamenewusers + ".png", bbox_inches='tight')
|
|
||||||
printnoln("\rglobal plot post ... plotting ... saving ...")
|
printnoln("\rglobal plot post ... plotting ... saving ...")
|
||||||
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
|
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
|
||||||
plt.close(gfig)
|
plt.close(gfig)
|
||||||
@@ -176,6 +186,8 @@ def main(folder, intervl):
|
|||||||
print("Sentiment not found for " + a['Id'])
|
print("Sentiment not found for " + a['Id'])
|
||||||
toxlevels.append(toxlevel)
|
toxlevels.append(toxlevel)
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
||||||
|
avgsent[0].append(np.mean([s['compound'] for s in toxlevels]))
|
||||||
|
avgsentsingle[0].append([s['compound'] for s in toxlevels])
|
||||||
|
|
||||||
dumptoxlevels(toxlevels, goutfilenameoldusers + ".py")
|
dumptoxlevels(toxlevels, goutfilenameoldusers + ".py")
|
||||||
|
|
||||||
@@ -198,9 +210,8 @@ def main(folder, intervl):
|
|||||||
axs[0, 1].set_yscale('log')
|
axs[0, 1].set_yscale('log')
|
||||||
axs[1, 1].set_yscale('log')
|
axs[1, 1].set_yscale('log')
|
||||||
|
|
||||||
# plt.show()
|
|
||||||
fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " +
|
fig.suptitle("Sentiment of answers to posts by most posting users (" + str(OLD_USER_PERCENTILE * 100) + "%tile)\nPosts created between " +
|
||||||
option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n=" + str(len(filteredposts)))
|
option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + ", n(q)=" + str(len(filteredposts)) + ", n(a)=" + str(len(toxlevels)))
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
||||||
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
|
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
@@ -212,6 +223,31 @@ def main(folder, intervl):
|
|||||||
for (i, cmd) in magickpost.items():
|
for (i, cmd) in magickpost.items():
|
||||||
os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf")
|
os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf")
|
||||||
|
|
||||||
|
# avg sentiment graph
|
||||||
|
print("Plotting average sentiments ...")
|
||||||
|
fig = plt.figure(figsize=(16, 12))
|
||||||
|
for i in postcounts:
|
||||||
|
plt.plot([iv[0] for iv in intervals], avgsent[i], label="new users (" + str(i) + " posts)")
|
||||||
|
plt.plot([iv[0] for iv in intervals], avgsent[0], label="old users (all posts)")
|
||||||
|
plt.title("Average sentiments")
|
||||||
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("time")
|
||||||
|
plt.ylabel("sentiment")
|
||||||
|
plt.legend(loc="upper right")
|
||||||
|
plt.savefig(outputdir + "/averagesentiment-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
# dump avgsentsingle
|
||||||
|
dumpavgsentsingle(avgsentsingle, outputdir + "/averagesentiment.txt")
|
||||||
|
avgss2 = readavgsentsingle(outputdir + "/averagesentiment.txt")
|
||||||
|
if avgsentsingle != avgss2:
|
||||||
|
print("wuaaaaaa")
|
||||||
|
with open(outputdir + "/log", "w") as file:
|
||||||
|
file.write(str(avgsentsingle))
|
||||||
|
file.write(str(avgss2))
|
||||||
|
# print("1: " + str(avgsentsingle))
|
||||||
|
# print("2: " + str(avgss2))
|
||||||
|
|
||||||
|
|
||||||
def dumptoxlevels(lvls, filename):
|
def dumptoxlevels(lvls, filename):
|
||||||
with open(filename, "w") as file:
|
with open(filename, "w") as file:
|
||||||
@@ -219,6 +255,21 @@ def dumptoxlevels(lvls, filename):
|
|||||||
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
|
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def dumpavgsentsingle(avg, filename):
|
||||||
|
with open(filename, "w") as file:
|
||||||
|
s = '\n'.join([str(i) + ':' + ';;'.join([';'.join([str(x) for x in a]) for a in avg[i]]) for i in range(len(avg))])
|
||||||
|
file.write(s)
|
||||||
|
|
||||||
|
|
||||||
|
def readavgsentsingle(filename):
|
||||||
|
with open(filename, "r") as file:
|
||||||
|
s = file.read()
|
||||||
|
s = s.split('\n')
|
||||||
|
s = [l.split(':', 2)[1] for l in s]
|
||||||
|
s = [[[float(x) for x in a.split(';')] if a != '' else [] for a in l.split(';;')] for l in s]
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# execute only if run as a script
|
# execute only if run as a script
|
||||||
usage = sys.argv[0] + " <folder>"
|
usage = sys.argv[0] + " <folder>"
|
||||||
|
|||||||
Reference in New Issue
Block a user