wip
This commit is contained in:
155
genitsexamples.py
Normal file
155
genitsexamples.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import os
|
||||
import random
|
||||
import statsmodels.api as sm
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, FIG_SIZE, difftime
|
||||
from loader import load, dmt, cms
|
||||
from sentiments import readtoxleveltxt
|
||||
|
||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||
thresholds = [3, 4, 5, 6]
|
||||
changedate = 0
|
||||
|
||||
|
||||
def main(intervl=1):
|
||||
jumpup = genData()
|
||||
intervals = [(i, i + 1) for i in range(-15, 16)]
|
||||
|
||||
outputdir = "itsexample/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
|
||||
data = []
|
||||
datasingle = []
|
||||
count = []
|
||||
for (i, val) in jumpup.items():
|
||||
print(i)
|
||||
# avg sentiments
|
||||
datasingle.append(val)
|
||||
avg = np.average(val) if len(val) > 0 else float("nan")
|
||||
data.append(avg)
|
||||
count.append(len(val))
|
||||
|
||||
avgcount = np.mean([x for x in count if str(x) != "nan"])
|
||||
stdcount = np.std([x for x in count if str(x) != "nan"])
|
||||
for i in range(len(count)):
|
||||
if str(count[i]) == "nan": # or np.abs((count[i] - avgcount) / stdcount) > 3:
|
||||
datasingle[i] = float("nan")
|
||||
data[i] = float("nan")
|
||||
count[i] = float("nan")
|
||||
|
||||
# filter nan entries
|
||||
for i in range(len(data)):
|
||||
while i < len(data) and str(data[i]) == "nan":
|
||||
del datasingle[i]
|
||||
del data[i]
|
||||
del intervals[i]
|
||||
del count[i]
|
||||
|
||||
print("Computing full ITS")
|
||||
t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
||||
x = np.reshape(np.array([(0 if intervals[i][1] <= changedate else 1) for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
||||
X = np.array(t)
|
||||
X = np.concatenate((X, x), 1)
|
||||
X = np.concatenate((X, np.multiply(t, x)), 1)
|
||||
y = np.reshape(np.array([d for a in datasingle for d in a]), (-1, 1))
|
||||
X = sm.add_constant(X)
|
||||
res = sm.OLS(y, X).fit()
|
||||
p2 = res.pvalues
|
||||
print("coef ols: " + str(res.params))
|
||||
print("sum ols: " + str(res.summary()))
|
||||
coef2ols = np.reshape(np.array(res.params), (-1, 1))
|
||||
its2ols = X.dot(coef2ols)
|
||||
with open(outputdir + "/summary-i" + str(intervl) + ".txt", "w") as file:
|
||||
file.write(str(res.summary()))
|
||||
|
||||
# thresdata = []
|
||||
# thresols = []
|
||||
# thresiv = []
|
||||
# thresp = []
|
||||
# print("Computing threshold ITS")
|
||||
# for ti in thresholds:
|
||||
# # print(1, changedate - relativedelta(months=ti))
|
||||
# # print(2, changedate + relativedelta(months=ti))
|
||||
# z = [(i, x) for (i, x) in zip(intervals, datasingle) if i[0] >= changedate - ti and i[1] <= changedate + ti]
|
||||
# iv = [i for (i, x) in z]
|
||||
# # print("iv " + str(iv))
|
||||
# d = [x for (i, x) in z]
|
||||
# t = np.reshape(np.array([i for i in range(len(d)) for j in d[i]]), (-1, 1))
|
||||
# x = np.reshape(np.array([(0 if iv[i][1] <= changedate else 1) for i in range(len(d)) for j in d[i]]), (-1, 1))
|
||||
# X = np.array(t)
|
||||
# X = np.concatenate((X, x), 1)
|
||||
# X = np.concatenate((X, np.multiply(t, x)), 1)
|
||||
# y = np.reshape(np.array([v for a in d for v in a]), (-1, 1))
|
||||
# X = sm.add_constant(X)
|
||||
# res = sm.OLS(y, X).fit()
|
||||
# tp = res.pvalues
|
||||
# thresp.append(tp)
|
||||
# # print("coef ols: " + str(res.params))
|
||||
# # print("sum ols: " + str(res.summary()))
|
||||
# coefthresols = np.reshape(np.array(res.params), (-1, 1))
|
||||
# thresols.append(X.dot(coefthresols))
|
||||
# thresiv.append(iv)
|
||||
# thresdata.append(d)
|
||||
# with open(outputdir + "/summary_threshold" + str(ti) + "-i" + str(intervl) + ".txt", "w") as file:
|
||||
# file.write(str(res.summary()))
|
||||
|
||||
fig = plt.figure(figsize=FIG_SIZE)
|
||||
plt.plot([difftime(i[0]) for i in intervals], data, label="average sentiment")
|
||||
plt.grid(True)
|
||||
for i in range(len(data)):
|
||||
va = "center"
|
||||
if 0 < i < len(data) - 1:
|
||||
if data[i - 1] < data[i] and data[i + 1] < data[i]:
|
||||
va = "bottom"
|
||||
elif data[i - 1] > data[i] and data[i + 1] > data[i]:
|
||||
va = "top"
|
||||
elif i == 0:
|
||||
if data[i + 1] < data[i]:
|
||||
va = "bottom"
|
||||
else:
|
||||
va = "top"
|
||||
elif i == len(data) - 1:
|
||||
if data[i - 1] < data[i]:
|
||||
va = "bottom"
|
||||
else:
|
||||
va = "top"
|
||||
plt.text(difftime(intervals[i][0]), data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
||||
plt.plot([difftime(intervals[i][0]) for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS")
|
||||
# print("shape: " + str(np.shape(thresdata)))
|
||||
# for (ti, t) in enumerate(thresholds):
|
||||
# # print("shape1: " + str(np.shape(thresdata[ti])))
|
||||
# plt.plot([difftime(thresiv[ti][i][0]) for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
||||
plt.title("Average sentiments for new users")
|
||||
plt.xticks(rotation=90)
|
||||
plt.xlabel("months")
|
||||
plt.ylabel("sentiment")
|
||||
plt.legend(loc="upper left")
|
||||
outfile = outputdir + "/average_sentiments-i" + str(intervl) + ".png"
|
||||
plt.savefig(outfile, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def difftime(i):
|
||||
return i
|
||||
|
||||
|
||||
def genData():
|
||||
# jumpup = {i: [0.31 for j in range((i*1337)%200 + 200)] for i in range(-15, 16)}
|
||||
jumpup = {}
|
||||
for i in range(-15, 0):
|
||||
r = random.random()
|
||||
jumpup[i] = ([0.10 + r / 20 for j in range(((20 + i) * 1337) % 200 + 200)])
|
||||
for i in range(0, 16):
|
||||
r = random.random()
|
||||
jumpup[i] = ([0.15 + r / 20 for j in range(((20 + i) * 1337) % 200 + 200)])
|
||||
return jumpup
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user