This commit is contained in:
wea_ondara
2020-05-08 09:49:17 +02:00
parent 6b29d4791e
commit 0ef9ea524b

155
genitsexamples.py Normal file
View File

@@ -0,0 +1,155 @@
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import statsmodels.api as sm
import sys
from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, FIG_SIZE, difftime
from loader import load, dmt, cms
from sentiments import readtoxleveltxt
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
thresholds = [3, 4, 5, 6]
changedate = 0
def main(intervl=1):
jumpup = genData()
intervals = [(i, i + 1) for i in range(-15, 16)]
outputdir = "itsexample/"
os.system("mkdir -p " + outputdir)
data = []
datasingle = []
count = []
for (i, val) in jumpup.items():
print(i)
# avg sentiments
datasingle.append(val)
avg = np.average(val) if len(val) > 0 else float("nan")
data.append(avg)
count.append(len(val))
avgcount = np.mean([x for x in count if str(x) != "nan"])
stdcount = np.std([x for x in count if str(x) != "nan"])
for i in range(len(count)):
if str(count[i]) == "nan": # or np.abs((count[i] - avgcount) / stdcount) > 3:
datasingle[i] = float("nan")
data[i] = float("nan")
count[i] = float("nan")
# filter nan entries
for i in range(len(data)):
while i < len(data) and str(data[i]) == "nan":
del datasingle[i]
del data[i]
del intervals[i]
del count[i]
print("Computing full ITS")
t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
x = np.reshape(np.array([(0 if intervals[i][1] <= changedate else 1) for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
X = np.array(t)
X = np.concatenate((X, x), 1)
X = np.concatenate((X, np.multiply(t, x)), 1)
y = np.reshape(np.array([d for a in datasingle for d in a]), (-1, 1))
X = sm.add_constant(X)
res = sm.OLS(y, X).fit()
p2 = res.pvalues
print("coef ols: " + str(res.params))
print("sum ols: " + str(res.summary()))
coef2ols = np.reshape(np.array(res.params), (-1, 1))
its2ols = X.dot(coef2ols)
with open(outputdir + "/summary-i" + str(intervl) + ".txt", "w") as file:
file.write(str(res.summary()))
# thresdata = []
# thresols = []
# thresiv = []
# thresp = []
# print("Computing threshold ITS")
# for ti in thresholds:
# # print(1, changedate - relativedelta(months=ti))
# # print(2, changedate + relativedelta(months=ti))
# z = [(i, x) for (i, x) in zip(intervals, datasingle) if i[0] >= changedate - ti and i[1] <= changedate + ti]
# iv = [i for (i, x) in z]
# # print("iv " + str(iv))
# d = [x for (i, x) in z]
# t = np.reshape(np.array([i for i in range(len(d)) for j in d[i]]), (-1, 1))
# x = np.reshape(np.array([(0 if iv[i][1] <= changedate else 1) for i in range(len(d)) for j in d[i]]), (-1, 1))
# X = np.array(t)
# X = np.concatenate((X, x), 1)
# X = np.concatenate((X, np.multiply(t, x)), 1)
# y = np.reshape(np.array([v for a in d for v in a]), (-1, 1))
# X = sm.add_constant(X)
# res = sm.OLS(y, X).fit()
# tp = res.pvalues
# thresp.append(tp)
# # print("coef ols: " + str(res.params))
# # print("sum ols: " + str(res.summary()))
# coefthresols = np.reshape(np.array(res.params), (-1, 1))
# thresols.append(X.dot(coefthresols))
# thresiv.append(iv)
# thresdata.append(d)
# with open(outputdir + "/summary_threshold" + str(ti) + "-i" + str(intervl) + ".txt", "w") as file:
# file.write(str(res.summary()))
fig = plt.figure(figsize=FIG_SIZE)
plt.plot([difftime(i[0]) for i in intervals], data, label="average sentiment")
plt.grid(True)
for i in range(len(data)):
va = "center"
if 0 < i < len(data) - 1:
if data[i - 1] < data[i] and data[i + 1] < data[i]:
va = "bottom"
elif data[i - 1] > data[i] and data[i + 1] > data[i]:
va = "top"
elif i == 0:
if data[i + 1] < data[i]:
va = "bottom"
else:
va = "top"
elif i == len(data) - 1:
if data[i - 1] < data[i]:
va = "bottom"
else:
va = "top"
plt.text(difftime(intervals[i][0]), data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
plt.plot([difftime(intervals[i][0]) for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS")
# print("shape: " + str(np.shape(thresdata)))
# for (ti, t) in enumerate(thresholds):
# # print("shape1: " + str(np.shape(thresdata[ti])))
# plt.plot([difftime(thresiv[ti][i][0]) for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
plt.title("Average sentiments for new users")
plt.xticks(rotation=90)
plt.xlabel("months")
plt.ylabel("sentiment")
plt.legend(loc="upper left")
outfile = outputdir + "/average_sentiments-i" + str(intervl) + ".png"
plt.savefig(outfile, bbox_inches='tight')
plt.close(fig)
def difftime(i):
return i
def genData():
# jumpup = {i: [0.31 for j in range((i*1337)%200 + 200)] for i in range(-15, 16)}
jumpup = {}
for i in range(-15, 0):
r = random.random()
jumpup[i] = ([0.10 + r / 20 for j in range(((20 + i) * 1337) % 200 + 200)])
for i in range(0, 16):
r = random.random()
jumpup[i] = ([0.15 + r / 20 for j in range(((20 + i) * 1337) % 200 + 200)])
return jumpup
if __name__ == "__main__":
main()