This commit is contained in:
wea_ondara
2019-08-11 16:47:52 +02:00
parent aacf71fad8
commit 0536f5db5f
5 changed files with 98 additions and 89 deletions

View File

@@ -7,34 +7,33 @@ from math import ceil
import matplotlib.pyplot as plt
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from common import calc_intervals, imprt, FigSaver
from common import calc_intervals, imprt, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK
from loader import load, dmt, cms
printnoln = lambda text: print(text, end='', flush=True)
rprint = lambda text: print('\r' + text)
DAYS_NEW_USER = 7
OLD_USER_YEAR = 3
OLD_USER_PERCENTILE = 0.95
analyser = SentimentIntensityAnalyzer()
figsaver = FigSaver()
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
def main(folder):
def main(folder, intervl):
users, posts, firstcontrib, sumcontrib = load(folder)
intervals = calc_intervals(posts)
intervals = calc_intervals(posts, intervl)
cachedsentiments = imprt(folder + "/output/sentiments.py").answers
outfolder = folder + "/output/batch/"
os.system("mkdir -p " + outfolder)
outputdir = folder + "/output/batch/"
os.system("mkdir -p " + outputdir)
postcounts = range(1, 5 + 1)
magickpost = {i: IMAGE_MAGICK for i in postcounts}
magickold = IMAGE_MAGICK
magickglobal = IMAGE_MAGICK
for (option_date_from, option_date_to) in intervals:
magickdate = IMAGE_MAGICK
# get questions for option_date_from <= creation date < option_date_to
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filter posts by dates").getresults()
if len(newposts) == 0:
@@ -51,8 +50,8 @@ def main(folder):
gpos = []
gcom = []
goutfilenamenewusers = outfolder + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
goutfilenameoldusers = outfolder + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
goutfilenamenewusers = outputdir + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
goutfilenameoldusers = outputdir + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
for option_posts in postcounts:
# print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + " - #posts: " + str(option_posts))
@@ -122,6 +121,9 @@ def main(folder):
fig.savefig(outfilename + ".png", bbox_inches='tight')
plt.close(fig)
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
magickpost[option_posts] += " " + outfilename + ".png"
magickdate += " " + outfilename + ".png"
os.system(magickdate + " " + goutfilenamenewusers + ".pdf")
# global
start = cms()
@@ -146,6 +148,7 @@ def main(folder):
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
plt.close(gfig)
rprint("global plot post ... plotting ... saving ... took " + str(cms() - start) + "ms")
magickglobal += " " + goutfilenamenewusers + ".png"
# for old users ---------------------------------------------------------------------------------
start = cms()
@@ -192,18 +195,16 @@ def main(folder):
# plt.show()
fig.suptitle("Sentiment of answers to posts by most posting users (95%tile)\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
# figsaver.save(fig, goutfilenameoldusers + ".png", bbox_inches='tight')
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
plt.close(fig)
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
magickold += " " + goutfilenameoldusers + ".png"
figsaver.join()
figsaver.join()
def computeToxLevel(text):
return analyser.polarity_scores(text)
os.system(magickglobal + " batch_newusers.pdf")
os.system(magickold + " batch_oldusers.pdf")
for (i, cmd) in magickpost.items():
os.system(cmd + " " + "batch_newusers_" + i + ".pdf")
def dumptoxlevels(lvls, filename):
@@ -222,5 +223,20 @@ if __name__ == "__main__":
if not os.path.isdir(folder):
print(folder + " is not a folder")
sys.exit(1)
interval = 3
if len(sys.argv) >= 3:
if sys.argv[2].startswith("-i"):
interval = sys.argv[2][2:]
try:
interval = int(interval)
except ValueError:
print("-i: int required")
sys.exit(1)
if interval < 1 or interval > 12:
print("-i: only 1 - 12")
sys.exit(1)
else:
print("unknown parameter: " + sys.argv[2])
sys.exit(1)
main(folder)
main(folder, interval)