wip
This commit is contained in:
99
common.py
99
common.py
@@ -1,7 +1,7 @@
|
||||
import importlib
|
||||
from threading import Thread, Lock
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
from threading import Thread, Lock
|
||||
|
||||
from loader import dmt
|
||||
|
||||
@@ -10,37 +10,66 @@ rprint = lambda text: print('\r' + text)
|
||||
|
||||
DAYS_NEW_USER = 7
|
||||
IMAGE_MAGICK = "magick"
|
||||
CHANGE_DATE = datetime.fromisoformat("2018-08-21T21:00:00")
|
||||
|
||||
|
||||
def calc_intervals(posts, months=3):
|
||||
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'],
|
||||
"firstpost").getresults()
|
||||
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'], "firstpost").getresults()
|
||||
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
|
||||
|
||||
# calc quarter beginning
|
||||
firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||
if (firstpost.month - 1) % months != 0:
|
||||
firstpost = firstpost.replace(month=firstpost.month - ((firstpost.month - 1) % months))
|
||||
lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||
if (lastpost.month - 1) % months != 0:
|
||||
lastpost = lastpost.replace(month=lastpost.month - ((lastpost.month - 1) % months))
|
||||
# add 3 months to last post
|
||||
if lastpost.month + months > 12:
|
||||
lastpost = lastpost.replace(month=lastpost.month + months - 12, year=lastpost.year + 1)
|
||||
else:
|
||||
lastpost = lastpost.replace(month=lastpost.month + months)
|
||||
# calc in months intervals from change date
|
||||
f = CHANGE_DATE.replace(month=CHANGE_DATE.month)
|
||||
while firstpost < f:
|
||||
f = f.replace(year=f.year - (1 if f.month - months < 1 else 0))
|
||||
f = f.replace(month=(f.month - months + 12 - 1) % 12 + 1)
|
||||
firstpost = f
|
||||
|
||||
# calc in months intervals from change date
|
||||
l = CHANGE_DATE.replace(month=CHANGE_DATE.month)
|
||||
while lastpost > l:
|
||||
l = l.replace(year=l.year + (1 if l.month + months > 12 else 0))
|
||||
l = l.replace(month=(l.month + months - 1) % 12 + 1)
|
||||
lastpost = l
|
||||
|
||||
# firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||
# if (firstpost.month - 1) % months != 0:
|
||||
# firstpost = firstpost.replace(month=firstpost.month - ((firstpost.month - 1) % months))
|
||||
# lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||
# if (lastpost.month - 1) % months != 0:
|
||||
# lastpost = lastpost.replace(month=lastpost.month - ((lastpost.month - 1) % months))
|
||||
# # add 3 months to last post
|
||||
# if lastpost.month + months > 12:
|
||||
# lastpost = lastpost.replace(month=lastpost.month + months - 12, year=lastpost.year + 1)
|
||||
# else:
|
||||
# lastpost = lastpost.replace(month=lastpost.month + months)
|
||||
|
||||
cdate = firstpost
|
||||
intervals = []
|
||||
while cdate < lastpost:
|
||||
nextmon = cdate.month + months
|
||||
nextquarter = cdate.replace(month=nextmon if nextmon <= 12 else nextmon - 12, year=cdate.year + (0 if nextmon <= 12 else 1))
|
||||
if cdate > firstpost and nextquarter < lastpost: # ignore first and last intervals as there is only partial data
|
||||
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
|
||||
intervals.append((cdate, nextquarter))
|
||||
cdate = nextquarter
|
||||
return intervals
|
||||
|
||||
|
||||
def difftime(date):
|
||||
diff = (date.year - CHANGE_DATE.year) * 12
|
||||
diff += ((date.month - CHANGE_DATE.month) % 12)
|
||||
if date.month - CHANGE_DATE.month < 0:
|
||||
diff -= 12
|
||||
return diff
|
||||
|
||||
# print(str(difftime(datetime.fromisoformat("2018-11-21T21:00:00"))) + ", 3")
|
||||
# print(str(difftime(datetime.fromisoformat("2018-05-21T21:00:00"))) + ", -3")
|
||||
# print(str(difftime(datetime.fromisoformat("2019-11-21T21:00:00"))) + ", 15")
|
||||
# print(str(difftime(datetime.fromisoformat("2017-05-21T21:00:00"))) + ", -15")
|
||||
# print(str(difftime(datetime.fromisoformat("2020-05-21T21:00:00"))) + ", 21")
|
||||
# print(str(difftime(datetime.fromisoformat("2016-11-21T21:00:00"))) + ", -21")
|
||||
|
||||
|
||||
def imprt(file):
|
||||
spec = importlib.util.spec_from_file_location("module.name", file)
|
||||
foo = importlib.util.module_from_spec(spec)
|
||||
@@ -48,22 +77,22 @@ def imprt(file):
|
||||
return foo
|
||||
|
||||
|
||||
class FigSaver():
|
||||
def __init__(self):
|
||||
self.__lock = Lock()
|
||||
self.__threads = []
|
||||
|
||||
def save(self, fig, path, **kwargs):
|
||||
thread = Thread(target=self.__dosave, args=(fig, path, kwargs))
|
||||
with self.__lock:
|
||||
self.__threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
def __dosave(self, fig, path, kwargs):
|
||||
fig.savefig(path, **kwargs)
|
||||
plt.close(fig)
|
||||
|
||||
def join(self):
|
||||
with self.__lock:
|
||||
for thread in self.__threads:
|
||||
thread.join()
|
||||
# class FigSaver():
|
||||
# def __init__(self):
|
||||
# self.__lock = Lock()
|
||||
# self.__threads = []
|
||||
#
|
||||
# def save(self, fig, path, **kwargs):
|
||||
# thread = Thread(target=self.__dosave, args=(fig, path, kwargs))
|
||||
# with self.__lock:
|
||||
# self.__threads.append(thread)
|
||||
# thread.start()
|
||||
#
|
||||
# def __dosave(self, fig, path, kwargs):
|
||||
# fig.savefig(path, **kwargs)
|
||||
# plt.close(fig)
|
||||
#
|
||||
# def join(self):
|
||||
# with self.__lock:
|
||||
# for thread in self.__threads:
|
||||
# thread.join()
|
||||
|
||||
15
its.py
15
its.py
@@ -1,14 +1,13 @@
|
||||
import sys
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import os
|
||||
import statsmodels.api as sm
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
|
||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, difftime
|
||||
from loader import load, dmt, cms
|
||||
from sentiments import readtoxleveltxt
|
||||
|
||||
@@ -58,7 +57,7 @@ def main(folder, intervl):
|
||||
avgcount = np.mean([x for x in count if str(x) != "nan"])
|
||||
stdcount = np.std([x for x in count if str(x) != "nan"])
|
||||
for i in range(len(count)):
|
||||
if str(count[i]) == "nan" or np.abs((count[i] - avgcount) / stdcount) > 3:
|
||||
if str(count[i]) == "nan": # or np.abs((count[i] - avgcount) / stdcount) > 3:
|
||||
datasingle[i] = float("nan")
|
||||
data[i] = float("nan")
|
||||
count[i] = float("nan")
|
||||
@@ -120,7 +119,7 @@ def main(folder, intervl):
|
||||
file.write(str(res.summary()))
|
||||
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([i[0] for i in intervals], data, label="average sentiment")
|
||||
plt.plot([difftime(i[0]) for i in intervals], data, label="average sentiment")
|
||||
plt.grid(True)
|
||||
for i in range(len(data)):
|
||||
va = "center"
|
||||
@@ -139,12 +138,12 @@ def main(folder, intervl):
|
||||
va = "bottom"
|
||||
else:
|
||||
va = "top"
|
||||
plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
||||
plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
||||
plt.text(difftime(intervals[i][0]), data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
||||
plt.plot([difftime(intervals[i][0]) for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
||||
# print("shape: " + str(np.shape(thresdata)))
|
||||
for (ti, t) in enumerate(thresholds):
|
||||
# print("shape1: " + str(np.shape(thresdata[ti])))
|
||||
plt.plot([thresiv[ti][i][0] for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
||||
plt.plot([difftime(thresiv[ti][i][0]) for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
||||
plt.title("Average sentiments for new users")
|
||||
plt.xticks(rotation=90)
|
||||
plt.xlabel("months")
|
||||
|
||||
Reference in New Issue
Block a user