wip
This commit is contained in:
54
posthist.py
54
posthist.py
@@ -1,12 +1,14 @@
|
||||
import os
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from datetime import timedelta
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
|
||||
from common import calc_intervals, IMAGE_MAGICK
|
||||
from common import calc_intervals, IMAGE_MAGICK, DAYS_NEW_USER
|
||||
from loader import load, dmt
|
||||
from sentiments import readtoxleveltxt
|
||||
|
||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||
|
||||
@@ -14,15 +16,19 @@ colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||
def main(folder, intervl):
|
||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||
intervals = calc_intervals(posts, intervl)
|
||||
(_, cachedsentiments) = readtoxleveltxt(folder + "/output/sentiments.txt")
|
||||
|
||||
outputdir = folder + "/output/posthist/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
|
||||
activeusercounts = []
|
||||
answerstonewusers = []
|
||||
sentimentstonewusers = []
|
||||
imgmagickcmd = IMAGE_MAGICK
|
||||
for (option_date_from, option_date_to) in intervals:
|
||||
print((option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y")))
|
||||
print(option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y"))
|
||||
|
||||
# post histograms
|
||||
# filter posts by option_date_from <= creation date <= option_date_to
|
||||
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
|
||||
|
||||
@@ -34,7 +40,7 @@ def main(folder, intervl):
|
||||
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
||||
activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys())))
|
||||
|
||||
histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") + "-i" + str(intervl)
|
||||
|
||||
histdata = [pc for pc in postcounts.values()]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
@@ -48,14 +54,50 @@ def main(folder, intervl):
|
||||
fig.savefig(histfilename + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
imgmagickcmd += " " + histfilename + ".png"
|
||||
os.system(imgmagickcmd + " " + outputdir + "/posthist.pdf")
|
||||
|
||||
# answers to new users
|
||||
answers = (dmt(posts).map(lambda q: [a for a in q['Answers'] if option_date_from <= a['CreationDate'] < option_date_to
|
||||
and firstcontrib[q['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']])
|
||||
.getresults())
|
||||
count = sum([len(a) for a in answers])
|
||||
answerstonewusers.append(((option_date_from, option_date_to), count))
|
||||
sent = ([cachedsentiments[a['Id']] for al in answers for a in al])
|
||||
sentbad = len([1 for a in sent if a['compound'] < -0.05])
|
||||
sentneu = len([1 for a in sent if -0.05 <= a['compound'] <= 0.05])
|
||||
sentgood = len([1 for a in sent if a['compound'] > 0.05])
|
||||
sentimentstonewusers.append(((option_date_from, option_date_to), (sent, sentbad, sentneu, sentgood)))
|
||||
|
||||
# gen pdf for post histograms
|
||||
os.system(imgmagickcmd + " " + outputdir + "/posthist-i" + str(intervl) + ".pdf")
|
||||
|
||||
# plot posts diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in activeusercounts], [y for (x, y) in activeusercounts])
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0)
|
||||
plt.ylim(bottom=0.001)
|
||||
plt.title("Active users")
|
||||
fig.savefig(outputdir + "activeusers.png", bbox_inches='tight')
|
||||
fig.savefig(outputdir + "activeusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
# plot answers to new users diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in answerstonewusers], [y for (x, y) in answerstonewusers])
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0.001)
|
||||
plt.title("#Answers to new users")
|
||||
fig.savefig(outputdir + "answerstonewusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
# plot sentiments of answers to new users diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [b for (x, [y, b, n, g]) in sentimentstonewusers], label="Neg. answer")
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [n for (x, [y, b, n, g]) in sentimentstonewusers], label="Neu. answer")
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [g for (x, [y, b, n, g]) in sentimentstonewusers], label="Pos. answer")
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0.001)
|
||||
plt.legend(loc="upper right")
|
||||
plt.title("Sentiments of answers to new users")
|
||||
fig.savefig(outputdir + "sentimentstonewusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user