wip
This commit is contained in:
@@ -34,7 +34,7 @@ def calc_intervals(posts, months=3):
|
|||||||
intervals = []
|
intervals = []
|
||||||
while cdate < lastpost:
|
while cdate < lastpost:
|
||||||
nextmon = cdate.month + months
|
nextmon = cdate.month + months
|
||||||
nextquarter = cdate.replace(month=nextmon if nextmon <=12 else nextmon-12, year=cdate.year + (0 if nextmon <= 12 else 1))
|
nextquarter = cdate.replace(month=nextmon if nextmon <= 12 else nextmon - 12, year=cdate.year + (0 if nextmon <= 12 else 1))
|
||||||
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
|
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
|
||||||
intervals.append((cdate, nextquarter))
|
intervals.append((cdate, nextquarter))
|
||||||
cdate = nextquarter
|
cdate = nextquarter
|
||||||
|
|||||||
100
its.py
100
its.py
@@ -1,19 +1,20 @@
|
|||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
|
import statsmodels.api as sm
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from sklearn.linear_model import LinearRegression
|
from dateutil.relativedelta import relativedelta
|
||||||
|
|
||||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
|
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
|
||||||
from loader import load, dmt, cms
|
from loader import load, dmt, cms
|
||||||
from sentiments import readtoxleveltxt
|
from sentiments import readtoxleveltxt
|
||||||
import statsmodels.api as sm
|
|
||||||
|
|
||||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||||
|
thresholds = [2, 3, 4, 5, 6]
|
||||||
|
changedate = datetime.fromisoformat("2018-09-01T00:00:00")
|
||||||
|
|
||||||
|
|
||||||
def main(folder, intervl):
|
def main(folder, intervl):
|
||||||
@@ -55,42 +56,13 @@ def main(folder, intervl):
|
|||||||
del data[i]
|
del data[i]
|
||||||
del intervals[i]
|
del intervals[i]
|
||||||
|
|
||||||
# print("Computing ITS ...")
|
|
||||||
# t = np.reshape(np.array([i for i in range(len(data))]), (-1, 1))
|
|
||||||
# x = np.reshape(np.array([(0 if option_date_to <= datetime.fromisoformat("2018-09-01T00:00:00") else 1) for (option_date_from, option_date_to) in intervals]), (-1, 1))
|
|
||||||
# X = np.array(t)
|
|
||||||
# X = np.concatenate((X, x), 1)
|
|
||||||
# X = np.concatenate((X, np.multiply(t, x)), 1)
|
|
||||||
# y = np.reshape(np.array(data), (-1, 1))
|
|
||||||
# # print("Xfin", X)
|
|
||||||
# # print("y", y)
|
|
||||||
# reg = LinearRegression()
|
|
||||||
# reg.fit(X, y)
|
|
||||||
# score = reg.score(X, y)
|
|
||||||
# coef = np.reshape(np.array(reg.coef_), (-1, 1))
|
|
||||||
# its = X.dot(coef) + reg.intercept_
|
|
||||||
# print("score: " + str(score))
|
|
||||||
# print("coef: " + str(coef))
|
|
||||||
# print("its: " + str(its))
|
|
||||||
|
|
||||||
print("Computing full ITS")
|
print("Computing full ITS")
|
||||||
t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
||||||
x = np.reshape(np.array([(0 if intervals[i][1] <= datetime.fromisoformat("2018-09-01T00:00:00") else 1) for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
x = np.reshape(np.array([(0 if intervals[i][1] <= changedate else 1) for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
||||||
X = np.array(t)
|
X = np.array(t)
|
||||||
X = np.concatenate((X, x), 1)
|
X = np.concatenate((X, x), 1)
|
||||||
X = np.concatenate((X, np.multiply(t, x)), 1)
|
X = np.concatenate((X, np.multiply(t, x)), 1)
|
||||||
y = np.reshape(np.array([d for a in datasingle for d in a]), (-1, 1))
|
y = np.reshape(np.array([d for a in datasingle for d in a]), (-1, 1))
|
||||||
# print("Xfin", X)
|
|
||||||
# print("y", y)
|
|
||||||
# reg = LinearRegression()
|
|
||||||
# reg.fit(X, y)
|
|
||||||
# score2 = reg.score(X, y)
|
|
||||||
# coef2 = np.reshape(np.array(reg.coef_), (-1, 1))
|
|
||||||
# its2 = X.dot(coef2) + reg.intercept_
|
|
||||||
# print("intercept: " + str(reg.intercept_))
|
|
||||||
# print("score: " + str(score2))
|
|
||||||
# print("coef: " + str(coef2))
|
|
||||||
# print("its: " + str(its2))
|
|
||||||
X = sm.add_constant(X)
|
X = sm.add_constant(X)
|
||||||
res = sm.OLS(y, X).fit()
|
res = sm.OLS(y, X).fit()
|
||||||
p2 = res.pvalues
|
p2 = res.pvalues
|
||||||
@@ -101,29 +73,35 @@ def main(folder, intervl):
|
|||||||
with open(outputdir + "/summary-i" + str(intervl) + ".txt", "w") as file:
|
with open(outputdir + "/summary-i" + str(intervl) + ".txt", "w") as file:
|
||||||
file.write(str(res.summary()))
|
file.write(str(res.summary()))
|
||||||
|
|
||||||
# print("Computing segmented ITS before")
|
thresdata = []
|
||||||
# X = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i] if intervals[i][1] <= datetime.fromisoformat("2018-09-01T00:00:00")]), (-1, 1))
|
thresols = []
|
||||||
# y = np.reshape(np.array([j for i in range(len(datasingle)) for j in datasingle[i] if intervals[i][1] <= datetime.fromisoformat("2018-09-01T00:00:00")]), (-1, 1))
|
thresiv = []
|
||||||
# reg = LinearRegression()
|
thresp = []
|
||||||
# reg.fit(X, y)
|
print("Computing threshold ITS")
|
||||||
# scoreb = reg.score(X, y)
|
for ti in thresholds:
|
||||||
# coefb = np.reshape(np.array(reg.coef_), (-1, 1))
|
print(1, changedate - relativedelta(months=ti))
|
||||||
# itsb = X.dot(coefb) + reg.intercept_
|
print(2, changedate + relativedelta(months=ti))
|
||||||
# print("scoreb: " + str(scoreb))
|
z = [(i, x) for (i, x) in zip(intervals, datasingle) if i[0] >= changedate - relativedelta(months=ti) and i[1] <= changedate + relativedelta(months=ti)]
|
||||||
# print("coefb: " + str(coefb))
|
iv = [i for (i, x) in z]
|
||||||
# print("itsb: " + str(itsb))
|
d = [x for (i, x) in z]
|
||||||
|
t = np.reshape(np.array([i for i in range(len(d)) for j in d[i]]), (-1, 1))
|
||||||
# print("Computing segmented ITS after")
|
x = np.reshape(np.array([(0 if iv[i][1] <= changedate else 1) for i in range(len(d)) for j in d[i]]), (-1, 1))
|
||||||
# X = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i] if intervals[i][1] > datetime.fromisoformat("2018-09-01T00:00:00")]), (-1, 1))
|
X = np.array(t)
|
||||||
# y = np.reshape(np.array([j for i in range(len(datasingle)) for j in datasingle[i] if intervals[i][1] > datetime.fromisoformat("2018-09-01T00:00:00")]), (-1, 1))
|
X = np.concatenate((X, x), 1)
|
||||||
# reg = LinearRegression()
|
X = np.concatenate((X, np.multiply(t, x)), 1)
|
||||||
# reg.fit(X, y)
|
y = np.reshape(np.array([v for a in d for v in a]), (-1, 1))
|
||||||
# scorea = reg.score(X, y)
|
X = sm.add_constant(X)
|
||||||
# coefa = np.reshape(np.array(reg.coef_), (-1, 1))
|
res = sm.OLS(y, X).fit()
|
||||||
# itsa = X.dot(coefa) + reg.intercept_
|
tp = res.pvalues
|
||||||
# print("scorea: " + str(scorea))
|
thresp.append(tp)
|
||||||
# print("coefa: " + str(coefa))
|
# print("coef ols: " + str(res.params))
|
||||||
# print("itsa: " + str(itsa))
|
# print("sum ols: " + str(res.summary()))
|
||||||
|
coefthresols = np.reshape(np.array(res.params), (-1, 1))
|
||||||
|
thresols.append(X.dot(coefthresols))
|
||||||
|
thresiv.append(iv)
|
||||||
|
thresdata.append(d)
|
||||||
|
with open(outputdir + "/summary_threshold" + str(ti) + "-i" + str(intervl) + ".txt", "w") as file:
|
||||||
|
file.write(str(res.summary()))
|
||||||
|
|
||||||
fig = plt.figure(figsize=(16, 12))
|
fig = plt.figure(figsize=(16, 12))
|
||||||
plt.plot([i[0] for i in intervals], data, label="average sentiment")
|
plt.plot([i[0] for i in intervals], data, label="average sentiment")
|
||||||
@@ -146,13 +124,11 @@ def main(folder, intervl):
|
|||||||
else:
|
else:
|
||||||
va = "top"
|
va = "top"
|
||||||
plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
||||||
# plt.plot([i[0] for i in intervals], its, label="aggregated ITS (score " + str(score) + ")")
|
|
||||||
# plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2, label="single ITS (score " + str(score2) + ", p " + str(p2) + ")")
|
|
||||||
plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
||||||
# plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i] if intervals[i][1] <= datetime.fromisoformat("2018-09-01T00:00:00")], itsb,
|
print("shape: " + str(np.shape(thresdata)))
|
||||||
# label="segmented ITS b (score " + str(scoreb) + ")")
|
for (ti, t) in enumerate(thresholds):
|
||||||
# plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i] if intervals[i][1] > datetime.fromisoformat("2018-09-01T00:00:00")], itsa,
|
print("shape1: " + str(np.shape(thresdata[ti])))
|
||||||
# label="segmented ITS a (score " + str(scorea) + ")")
|
plt.plot([thresiv[ti][i][0] for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
||||||
plt.title("Average sentiments for new users")
|
plt.title("Average sentiments for new users")
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
plt.xlabel("months")
|
plt.xlabel("months")
|
||||||
|
|||||||
24
text/0_doc.tex
Normal file
24
text/0_doc.tex
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
\documentclass[a4paper,10pt]{article}
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
|
||||||
|
%opening
|
||||||
|
\title{}
|
||||||
|
\author{}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
\begin{abstract}
|
||||||
|
|
||||||
|
\end{abstract}
|
||||||
|
|
||||||
|
\include{1_intro}
|
||||||
|
\include{2_relwork}
|
||||||
|
\include{3_method}
|
||||||
|
\include{4_datasets}
|
||||||
|
\include{5_results}
|
||||||
|
\include{6_discussion}
|
||||||
|
\include{7_conclusion}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
1
text/1_intro.tex
Normal file
1
text/1_intro.tex
Normal file
@@ -0,0 +1 @@
|
|||||||
|
\section{Introduction}
|
||||||
4
text/2_relwork.tex
Normal file
4
text/2_relwork.tex
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
\section{Related Work}
|
||||||
|
|
||||||
|
%tutorial: Bernal et al. \cite{bernal2017interrupted}
|
||||||
|
%literatur analyse todo
|
||||||
10
text/bib.bib
Normal file
10
text/bib.bib
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
@article{bernal2017interrupted,
|
||||||
|
title={Interrupted time series regression for the evaluation of public health interventions: a tutorial},
|
||||||
|
author={Bernal, James Lopez and Cummins, Steven and Gasparrini, Antonio},
|
||||||
|
journal={International journal of epidemiology},
|
||||||
|
volume={46},
|
||||||
|
number={1},
|
||||||
|
pages={348--355},
|
||||||
|
year={2017},
|
||||||
|
publisher={Oxford University Press}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user