This commit is contained in:
wea_ondara
2020-04-08 12:28:52 +02:00
parent e926ea5b22
commit 813d5edf24
2 changed files with 73 additions and 45 deletions

103
common.py
View File

@@ -1,7 +1,7 @@
import importlib
from threading import Thread, Lock
import matplotlib.pyplot as plt
from datetime import datetime
from threading import Thread, Lock
from loader import dmt
@@ -10,37 +10,66 @@ rprint = lambda text: print('\r' + text)
DAYS_NEW_USER = 7
IMAGE_MAGICK = "magick"
CHANGE_DATE = datetime.fromisoformat("2018-08-21T21:00:00")
def calc_intervals(posts, months=3):
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'],
"firstpost").getresults()
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'], "firstpost").getresults()
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
# calc quarter beginning
firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
if (firstpost.month - 1) % months != 0:
firstpost = firstpost.replace(month=firstpost.month - ((firstpost.month - 1) % months))
lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
if (lastpost.month - 1) % months != 0:
lastpost = lastpost.replace(month=lastpost.month - ((lastpost.month - 1) % months))
# add 3 months to last post
if lastpost.month + months > 12:
lastpost = lastpost.replace(month=lastpost.month + months - 12, year=lastpost.year + 1)
else:
lastpost = lastpost.replace(month=lastpost.month + months)
# calc in months intervals from change date
f = CHANGE_DATE.replace(month=CHANGE_DATE.month)
while firstpost < f:
f = f.replace(year=f.year - (1 if f.month - months < 1 else 0))
f = f.replace(month=(f.month - months + 12 - 1) % 12 + 1)
firstpost = f
# calc in months intervals from change date
l = CHANGE_DATE.replace(month=CHANGE_DATE.month)
while lastpost > l:
l = l.replace(year=l.year + (1 if l.month + months > 12 else 0))
l = l.replace(month=(l.month + months - 1) % 12 + 1)
lastpost = l
# firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
# if (firstpost.month - 1) % months != 0:
# firstpost = firstpost.replace(month=firstpost.month - ((firstpost.month - 1) % months))
# lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
# if (lastpost.month - 1) % months != 0:
# lastpost = lastpost.replace(month=lastpost.month - ((lastpost.month - 1) % months))
# # add 3 months to last post
# if lastpost.month + months > 12:
# lastpost = lastpost.replace(month=lastpost.month + months - 12, year=lastpost.year + 1)
# else:
# lastpost = lastpost.replace(month=lastpost.month + months)
cdate = firstpost
intervals = []
while cdate < lastpost:
nextmon = cdate.month + months
nextquarter = cdate.replace(month=nextmon if nextmon <= 12 else nextmon - 12, year=cdate.year + (0 if nextmon <= 12 else 1))
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
intervals.append((cdate, nextquarter))
if cdate > firstpost and nextquarter < lastpost: # ignore first and last intervals as there is only partial data
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
intervals.append((cdate, nextquarter))
cdate = nextquarter
return intervals
def difftime(date):
diff = (date.year - CHANGE_DATE.year) * 12
diff += ((date.month - CHANGE_DATE.month) % 12)
if date.month - CHANGE_DATE.month < 0:
diff -= 12
return diff
# print(str(difftime(datetime.fromisoformat("2018-11-21T21:00:00"))) + ", 3")
# print(str(difftime(datetime.fromisoformat("2018-05-21T21:00:00"))) + ", -3")
# print(str(difftime(datetime.fromisoformat("2019-11-21T21:00:00"))) + ", 15")
# print(str(difftime(datetime.fromisoformat("2017-05-21T21:00:00"))) + ", -15")
# print(str(difftime(datetime.fromisoformat("2020-05-21T21:00:00"))) + ", 21")
# print(str(difftime(datetime.fromisoformat("2016-11-21T21:00:00"))) + ", -21")
def imprt(file):
spec = importlib.util.spec_from_file_location("module.name", file)
foo = importlib.util.module_from_spec(spec)
@@ -48,22 +77,22 @@ def imprt(file):
return foo
class FigSaver():
def __init__(self):
self.__lock = Lock()
self.__threads = []
def save(self, fig, path, **kwargs):
thread = Thread(target=self.__dosave, args=(fig, path, kwargs))
with self.__lock:
self.__threads.append(thread)
thread.start()
def __dosave(self, fig, path, kwargs):
fig.savefig(path, **kwargs)
plt.close(fig)
def join(self):
with self.__lock:
for thread in self.__threads:
thread.join()
# class FigSaver():
# def __init__(self):
# self.__lock = Lock()
# self.__threads = []
#
# def save(self, fig, path, **kwargs):
# thread = Thread(target=self.__dosave, args=(fig, path, kwargs))
# with self.__lock:
# self.__threads.append(thread)
# thread.start()
#
# def __dosave(self, fig, path, kwargs):
# fig.savefig(path, **kwargs)
# plt.close(fig)
#
# def join(self):
# with self.__lock:
# for thread in self.__threads:
# thread.join()

15
its.py
View File

@@ -1,14 +1,13 @@
import sys
import matplotlib.pyplot as plt
import numpy as np
import os
import statsmodels.api as sm
import sys
from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, difftime
from loader import load, dmt, cms
from sentiments import readtoxleveltxt
@@ -58,7 +57,7 @@ def main(folder, intervl):
avgcount = np.mean([x for x in count if str(x) != "nan"])
stdcount = np.std([x for x in count if str(x) != "nan"])
for i in range(len(count)):
if str(count[i]) == "nan" or np.abs((count[i] - avgcount) / stdcount) > 3:
if str(count[i]) == "nan": # or np.abs((count[i] - avgcount) / stdcount) > 3:
datasingle[i] = float("nan")
data[i] = float("nan")
count[i] = float("nan")
@@ -120,7 +119,7 @@ def main(folder, intervl):
file.write(str(res.summary()))
fig = plt.figure(figsize=(16, 12))
plt.plot([i[0] for i in intervals], data, label="average sentiment")
plt.plot([difftime(i[0]) for i in intervals], data, label="average sentiment")
plt.grid(True)
for i in range(len(data)):
va = "center"
@@ -139,12 +138,12 @@ def main(folder, intervl):
va = "bottom"
else:
va = "top"
plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
plt.text(difftime(intervals[i][0]), data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
plt.plot([difftime(intervals[i][0]) for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
# print("shape: " + str(np.shape(thresdata)))
for (ti, t) in enumerate(thresholds):
# print("shape1: " + str(np.shape(thresdata[ti])))
plt.plot([thresiv[ti][i][0] for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
plt.plot([difftime(thresiv[ti][i][0]) for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
plt.title("Average sentiments for new users")
plt.xticks(rotation=90)
plt.xlabel("months")