wip
This commit is contained in:
88
posthist.py
Normal file
88
posthist.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
import sys
|
||||
import os
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from collections import defaultdict
|
||||
from loader import load, dmt, cms
|
||||
import math
|
||||
from common import calc_intervals
|
||||
|
||||
printnoln = lambda text: print(text, end='', flush=True)
|
||||
rprint = lambda text: print('\r' + text)
|
||||
|
||||
DAYS_NEW_USER = 7
|
||||
OLD_USER_YEAR = 3
|
||||
|
||||
analyser = SentimentIntensityAnalyzer()
|
||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||
|
||||
|
||||
def main(folder):
|
||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||
intervals = calc_intervals(posts)
|
||||
|
||||
for (option_date_from, option_date_to) in intervals:
|
||||
print((option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y")))
|
||||
|
||||
# filter posts by option_date_from <= creation date <= option_date_to
|
||||
newusers = set(dmt(users).filter(lambda u: option_date_from <= u['CreationDate'] < option_date_to, "filtering users by creation").map(lambda u: u['Id'], "getting user ids").getresults())
|
||||
newposts = dmt(posts).filter(lambda p: p['OwnerUserId'] in newusers, "filtering posts by users").getresults()
|
||||
|
||||
postcounts = defaultdict(list)
|
||||
i = 0
|
||||
for p in newposts:
|
||||
postcounts[p['OwnerUserId']].append(p)
|
||||
i = i + 1
|
||||
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
||||
# print("i: " + str(i) + " expected: " + str(len(newposts)) + " is: " + str(sum([pc for pc in postcounts.values()])))
|
||||
|
||||
os.system("mkdir -p " + folder + "/output")
|
||||
histfilename = folder + "/output/posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
countfilename = folder + "/output/postcount_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
|
||||
# fig = plt.figure(figsize=(16, 12))
|
||||
# plt.plot(userids, [len(pc) for pc in postcounts])
|
||||
# plt.title("Post count for users between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
|
||||
# plt.xticks(rotation=90)
|
||||
# fig.savefig(countfilename + ".png", bbox_inches='tight')
|
||||
# plt.close(fig)
|
||||
|
||||
histdata = [pc for pc in postcounts.values()]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.hist(histdata, range(max(histdata, default=0) + 1))
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0)
|
||||
plt.title("Histogram for user post count registered between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
|
||||
fig.savefig(histfilename + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def computeToxLevel(text):
|
||||
return analyser.polarity_scores(text)
|
||||
|
||||
|
||||
def flatmap(arr):
|
||||
return [item for sublist in arr for item in sublist]
|
||||
|
||||
|
||||
def dumptoxlevels(lvls, filename):
|
||||
with open(filename, "w") as file:
|
||||
file.write("from collections import defaultdict\n\n")
|
||||
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# execute only if run as a script
|
||||
usage = sys.argv[0] + " <folder>"
|
||||
if len(sys.argv) < 2:
|
||||
print(usage)
|
||||
sys.exit(1)
|
||||
folder = sys.argv[1]
|
||||
if not os.path.isdir(folder):
|
||||
print(folder + " is not a folder")
|
||||
sys.exit(1)
|
||||
|
||||
main(folder)
|
||||
Reference in New Issue
Block a user