wip
This commit is contained in:
103
common.py
103
common.py
@@ -1,7 +1,7 @@
|
|||||||
import importlib
|
import importlib
|
||||||
from threading import Thread, Lock
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from datetime import datetime
|
||||||
|
from threading import Thread, Lock
|
||||||
|
|
||||||
from loader import dmt
|
from loader import dmt
|
||||||
|
|
||||||
@@ -10,37 +10,66 @@ rprint = lambda text: print('\r' + text)
|
|||||||
|
|
||||||
DAYS_NEW_USER = 7
|
DAYS_NEW_USER = 7
|
||||||
IMAGE_MAGICK = "magick"
|
IMAGE_MAGICK = "magick"
|
||||||
|
CHANGE_DATE = datetime.fromisoformat("2018-08-21T21:00:00")
|
||||||
|
|
||||||
|
|
||||||
def calc_intervals(posts, months=3):
|
def calc_intervals(posts, months=3):
|
||||||
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'],
|
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'], "firstpost").getresults()
|
||||||
"firstpost").getresults()
|
|
||||||
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
|
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
|
||||||
|
|
||||||
# calc quarter beginning
|
# calc in months intervals from change date
|
||||||
firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
f = CHANGE_DATE.replace(month=CHANGE_DATE.month)
|
||||||
if (firstpost.month - 1) % months != 0:
|
while firstpost < f:
|
||||||
firstpost = firstpost.replace(month=firstpost.month - ((firstpost.month - 1) % months))
|
f = f.replace(year=f.year - (1 if f.month - months < 1 else 0))
|
||||||
lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
f = f.replace(month=(f.month - months + 12 - 1) % 12 + 1)
|
||||||
if (lastpost.month - 1) % months != 0:
|
firstpost = f
|
||||||
lastpost = lastpost.replace(month=lastpost.month - ((lastpost.month - 1) % months))
|
|
||||||
# add 3 months to last post
|
# calc in months intervals from change date
|
||||||
if lastpost.month + months > 12:
|
l = CHANGE_DATE.replace(month=CHANGE_DATE.month)
|
||||||
lastpost = lastpost.replace(month=lastpost.month + months - 12, year=lastpost.year + 1)
|
while lastpost > l:
|
||||||
else:
|
l = l.replace(year=l.year + (1 if l.month + months > 12 else 0))
|
||||||
lastpost = lastpost.replace(month=lastpost.month + months)
|
l = l.replace(month=(l.month + months - 1) % 12 + 1)
|
||||||
|
lastpost = l
|
||||||
|
|
||||||
|
# firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
# if (firstpost.month - 1) % months != 0:
|
||||||
|
# firstpost = firstpost.replace(month=firstpost.month - ((firstpost.month - 1) % months))
|
||||||
|
# lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
# if (lastpost.month - 1) % months != 0:
|
||||||
|
# lastpost = lastpost.replace(month=lastpost.month - ((lastpost.month - 1) % months))
|
||||||
|
# # add 3 months to last post
|
||||||
|
# if lastpost.month + months > 12:
|
||||||
|
# lastpost = lastpost.replace(month=lastpost.month + months - 12, year=lastpost.year + 1)
|
||||||
|
# else:
|
||||||
|
# lastpost = lastpost.replace(month=lastpost.month + months)
|
||||||
|
|
||||||
cdate = firstpost
|
cdate = firstpost
|
||||||
intervals = []
|
intervals = []
|
||||||
while cdate < lastpost:
|
while cdate < lastpost:
|
||||||
nextmon = cdate.month + months
|
nextmon = cdate.month + months
|
||||||
nextquarter = cdate.replace(month=nextmon if nextmon <= 12 else nextmon - 12, year=cdate.year + (0 if nextmon <= 12 else 1))
|
nextquarter = cdate.replace(month=nextmon if nextmon <= 12 else nextmon - 12, year=cdate.year + (0 if nextmon <= 12 else 1))
|
||||||
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
|
if cdate > firstpost and nextquarter < lastpost: # ignore first and last intervals as there is only partial data
|
||||||
intervals.append((cdate, nextquarter))
|
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
|
||||||
|
intervals.append((cdate, nextquarter))
|
||||||
cdate = nextquarter
|
cdate = nextquarter
|
||||||
return intervals
|
return intervals
|
||||||
|
|
||||||
|
|
||||||
|
def difftime(date):
|
||||||
|
diff = (date.year - CHANGE_DATE.year) * 12
|
||||||
|
diff += ((date.month - CHANGE_DATE.month) % 12)
|
||||||
|
if date.month - CHANGE_DATE.month < 0:
|
||||||
|
diff -= 12
|
||||||
|
return diff
|
||||||
|
|
||||||
|
# print(str(difftime(datetime.fromisoformat("2018-11-21T21:00:00"))) + ", 3")
|
||||||
|
# print(str(difftime(datetime.fromisoformat("2018-05-21T21:00:00"))) + ", -3")
|
||||||
|
# print(str(difftime(datetime.fromisoformat("2019-11-21T21:00:00"))) + ", 15")
|
||||||
|
# print(str(difftime(datetime.fromisoformat("2017-05-21T21:00:00"))) + ", -15")
|
||||||
|
# print(str(difftime(datetime.fromisoformat("2020-05-21T21:00:00"))) + ", 21")
|
||||||
|
# print(str(difftime(datetime.fromisoformat("2016-11-21T21:00:00"))) + ", -21")
|
||||||
|
|
||||||
|
|
||||||
def imprt(file):
|
def imprt(file):
|
||||||
spec = importlib.util.spec_from_file_location("module.name", file)
|
spec = importlib.util.spec_from_file_location("module.name", file)
|
||||||
foo = importlib.util.module_from_spec(spec)
|
foo = importlib.util.module_from_spec(spec)
|
||||||
@@ -48,22 +77,22 @@ def imprt(file):
|
|||||||
return foo
|
return foo
|
||||||
|
|
||||||
|
|
||||||
class FigSaver():
|
# class FigSaver():
|
||||||
def __init__(self):
|
# def __init__(self):
|
||||||
self.__lock = Lock()
|
# self.__lock = Lock()
|
||||||
self.__threads = []
|
# self.__threads = []
|
||||||
|
#
|
||||||
def save(self, fig, path, **kwargs):
|
# def save(self, fig, path, **kwargs):
|
||||||
thread = Thread(target=self.__dosave, args=(fig, path, kwargs))
|
# thread = Thread(target=self.__dosave, args=(fig, path, kwargs))
|
||||||
with self.__lock:
|
# with self.__lock:
|
||||||
self.__threads.append(thread)
|
# self.__threads.append(thread)
|
||||||
thread.start()
|
# thread.start()
|
||||||
|
#
|
||||||
def __dosave(self, fig, path, kwargs):
|
# def __dosave(self, fig, path, kwargs):
|
||||||
fig.savefig(path, **kwargs)
|
# fig.savefig(path, **kwargs)
|
||||||
plt.close(fig)
|
# plt.close(fig)
|
||||||
|
#
|
||||||
def join(self):
|
# def join(self):
|
||||||
with self.__lock:
|
# with self.__lock:
|
||||||
for thread in self.__threads:
|
# for thread in self.__threads:
|
||||||
thread.join()
|
# thread.join()
|
||||||
|
|||||||
15
its.py
15
its.py
@@ -1,14 +1,13 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
import statsmodels.api as sm
|
import statsmodels.api as sm
|
||||||
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
|
|
||||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
|
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, difftime
|
||||||
from loader import load, dmt, cms
|
from loader import load, dmt, cms
|
||||||
from sentiments import readtoxleveltxt
|
from sentiments import readtoxleveltxt
|
||||||
|
|
||||||
@@ -58,7 +57,7 @@ def main(folder, intervl):
|
|||||||
avgcount = np.mean([x for x in count if str(x) != "nan"])
|
avgcount = np.mean([x for x in count if str(x) != "nan"])
|
||||||
stdcount = np.std([x for x in count if str(x) != "nan"])
|
stdcount = np.std([x for x in count if str(x) != "nan"])
|
||||||
for i in range(len(count)):
|
for i in range(len(count)):
|
||||||
if str(count[i]) == "nan" or np.abs((count[i] - avgcount) / stdcount) > 3:
|
if str(count[i]) == "nan": # or np.abs((count[i] - avgcount) / stdcount) > 3:
|
||||||
datasingle[i] = float("nan")
|
datasingle[i] = float("nan")
|
||||||
data[i] = float("nan")
|
data[i] = float("nan")
|
||||||
count[i] = float("nan")
|
count[i] = float("nan")
|
||||||
@@ -120,7 +119,7 @@ def main(folder, intervl):
|
|||||||
file.write(str(res.summary()))
|
file.write(str(res.summary()))
|
||||||
|
|
||||||
fig = plt.figure(figsize=(16, 12))
|
fig = plt.figure(figsize=(16, 12))
|
||||||
plt.plot([i[0] for i in intervals], data, label="average sentiment")
|
plt.plot([difftime(i[0]) for i in intervals], data, label="average sentiment")
|
||||||
plt.grid(True)
|
plt.grid(True)
|
||||||
for i in range(len(data)):
|
for i in range(len(data)):
|
||||||
va = "center"
|
va = "center"
|
||||||
@@ -139,12 +138,12 @@ def main(folder, intervl):
|
|||||||
va = "bottom"
|
va = "bottom"
|
||||||
else:
|
else:
|
||||||
va = "top"
|
va = "top"
|
||||||
plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
plt.text(difftime(intervals[i][0]), data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
||||||
plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
plt.plot([difftime(intervals[i][0]) for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
||||||
# print("shape: " + str(np.shape(thresdata)))
|
# print("shape: " + str(np.shape(thresdata)))
|
||||||
for (ti, t) in enumerate(thresholds):
|
for (ti, t) in enumerate(thresholds):
|
||||||
# print("shape1: " + str(np.shape(thresdata[ti])))
|
# print("shape1: " + str(np.shape(thresdata[ti])))
|
||||||
plt.plot([thresiv[ti][i][0] for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
plt.plot([difftime(thresiv[ti][i][0]) for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
||||||
plt.title("Average sentiments for new users")
|
plt.title("Average sentiments for new users")
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
plt.xlabel("months")
|
plt.xlabel("months")
|
||||||
|
|||||||
Reference in New Issue
Block a user