wip
This commit is contained in:
@@ -7,34 +7,33 @@ from math import ceil
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
|
||||
from common import calc_intervals, imprt, FigSaver
|
||||
from common import calc_intervals, imprt, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK
|
||||
from loader import load, dmt, cms
|
||||
|
||||
printnoln = lambda text: print(text, end='', flush=True)
|
||||
rprint = lambda text: print('\r' + text)
|
||||
|
||||
DAYS_NEW_USER = 7
|
||||
OLD_USER_YEAR = 3
|
||||
OLD_USER_PERCENTILE = 0.95
|
||||
|
||||
analyser = SentimentIntensityAnalyzer()
|
||||
figsaver = FigSaver()
|
||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||
|
||||
|
||||
def main(folder):
|
||||
def main(folder, intervl):
|
||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||
|
||||
intervals = calc_intervals(posts)
|
||||
intervals = calc_intervals(posts, intervl)
|
||||
cachedsentiments = imprt(folder + "/output/sentiments.py").answers
|
||||
|
||||
outfolder = folder + "/output/batch/"
|
||||
os.system("mkdir -p " + outfolder)
|
||||
outputdir = folder + "/output/batch/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
|
||||
postcounts = range(1, 5 + 1)
|
||||
|
||||
magickpost = {i: IMAGE_MAGICK for i in postcounts}
|
||||
magickold = IMAGE_MAGICK
|
||||
magickglobal = IMAGE_MAGICK
|
||||
|
||||
for (option_date_from, option_date_to) in intervals:
|
||||
magickdate = IMAGE_MAGICK
|
||||
|
||||
# get questions for option_date_from <= creation date < option_date_to
|
||||
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filter posts by dates").getresults()
|
||||
if len(newposts) == 0:
|
||||
@@ -51,8 +50,8 @@ def main(folder):
|
||||
gpos = []
|
||||
gcom = []
|
||||
|
||||
goutfilenamenewusers = outfolder + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
goutfilenameoldusers = outfolder + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
goutfilenamenewusers = outputdir + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
goutfilenameoldusers = outputdir + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
|
||||
for option_posts in postcounts:
|
||||
# print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + " - #posts: " + str(option_posts))
|
||||
@@ -122,6 +121,9 @@ def main(folder):
|
||||
fig.savefig(outfilename + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
||||
magickpost[option_posts] += " " + outfilename + ".png"
|
||||
magickdate += " " + outfilename + ".png"
|
||||
os.system(magickdate + " " + goutfilenamenewusers + ".pdf")
|
||||
|
||||
# global
|
||||
start = cms()
|
||||
@@ -146,6 +148,7 @@ def main(folder):
|
||||
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
|
||||
plt.close(gfig)
|
||||
rprint("global plot post ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
||||
magickglobal += " " + goutfilenamenewusers + ".png"
|
||||
|
||||
# for old users ---------------------------------------------------------------------------------
|
||||
start = cms()
|
||||
@@ -192,18 +195,16 @@ def main(folder):
|
||||
|
||||
# plt.show()
|
||||
fig.suptitle("Sentiment of answers to posts by most posting users (95%tile)\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
||||
# figsaver.save(fig, goutfilenameoldusers + ".png", bbox_inches='tight')
|
||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
||||
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
||||
magickold += " " + goutfilenameoldusers + ".png"
|
||||
|
||||
figsaver.join()
|
||||
figsaver.join()
|
||||
|
||||
|
||||
def computeToxLevel(text):
|
||||
return analyser.polarity_scores(text)
|
||||
os.system(magickglobal + " batch_newusers.pdf")
|
||||
os.system(magickold + " batch_oldusers.pdf")
|
||||
for (i, cmd) in magickpost.items():
|
||||
os.system(cmd + " " + "batch_newusers_" + i + ".pdf")
|
||||
|
||||
|
||||
def dumptoxlevels(lvls, filename):
|
||||
@@ -222,5 +223,20 @@ if __name__ == "__main__":
|
||||
if not os.path.isdir(folder):
|
||||
print(folder + " is not a folder")
|
||||
sys.exit(1)
|
||||
interval = 3
|
||||
if len(sys.argv) >= 3:
|
||||
if sys.argv[2].startswith("-i"):
|
||||
interval = sys.argv[2][2:]
|
||||
try:
|
||||
interval = int(interval)
|
||||
except ValueError:
|
||||
print("-i: int required")
|
||||
sys.exit(1)
|
||||
if interval < 1 or interval > 12:
|
||||
print("-i: only 1 - 12")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("unknown parameter: " + sys.argv[2])
|
||||
sys.exit(1)
|
||||
|
||||
main(folder)
|
||||
main(folder, interval)
|
||||
|
||||
Reference in New Issue
Block a user