import matplotlib.pyplot as plt import numpy as np import os import random import statsmodels.api as sm import sys from datetime import datetime from datetime import timedelta from dateutil.relativedelta import relativedelta from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER, FIG_SIZE, difftime from loader import load, dmt, cms from sentiments import readtoxleveltxt colors = ['red', 'green', 'blue', 'orange', 'deeppink'] thresholds = [3, 4, 5, 6] changedate = 0 def main(intervl=1): jumpup = genData() intervals = [(i, i + 1) for i in range(-15, 16)] outputdir = "itsexample/" os.system("mkdir -p " + outputdir) data = [] datasingle = [] count = [] for (i, val) in jumpup.items(): print(i) # avg sentiments datasingle.append(val) avg = np.average(val) if len(val) > 0 else float("nan") data.append(avg) count.append(len(val)) avgcount = np.mean([x for x in count if str(x) != "nan"]) stdcount = np.std([x for x in count if str(x) != "nan"]) for i in range(len(count)): if str(count[i]) == "nan": # or np.abs((count[i] - avgcount) / stdcount) > 3: datasingle[i] = float("nan") data[i] = float("nan") count[i] = float("nan") # filter nan entries for i in range(len(data)): while i < len(data) and str(data[i]) == "nan": del datasingle[i] del data[i] del intervals[i] del count[i] print("Computing full ITS") t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1)) x = np.reshape(np.array([(0 if intervals[i][1] <= changedate else 1) for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1)) X = np.array(t) X = np.concatenate((X, x), 1) X = np.concatenate((X, np.multiply(t, x)), 1) y = np.reshape(np.array([d for a in datasingle for d in a]), (-1, 1)) X = sm.add_constant(X) res = sm.OLS(y, X).fit() p2 = res.pvalues print("coef ols: " + str(res.params)) print("sum ols: " + str(res.summary())) coef2ols = np.reshape(np.array(res.params), (-1, 1)) its2ols = X.dot(coef2ols) with open(outputdir + "/summary-i" + str(intervl) + ".txt", "w") as file: file.write(str(res.summary())) # thresdata = [] # thresols = [] # thresiv = [] # thresp = [] # print("Computing threshold ITS") # for ti in thresholds: # # print(1, changedate - relativedelta(months=ti)) # # print(2, changedate + relativedelta(months=ti)) # z = [(i, x) for (i, x) in zip(intervals, datasingle) if i[0] >= changedate - ti and i[1] <= changedate + ti] # iv = [i for (i, x) in z] # # print("iv " + str(iv)) # d = [x for (i, x) in z] # t = np.reshape(np.array([i for i in range(len(d)) for j in d[i]]), (-1, 1)) # x = np.reshape(np.array([(0 if iv[i][1] <= changedate else 1) for i in range(len(d)) for j in d[i]]), (-1, 1)) # X = np.array(t) # X = np.concatenate((X, x), 1) # X = np.concatenate((X, np.multiply(t, x)), 1) # y = np.reshape(np.array([v for a in d for v in a]), (-1, 1)) # X = sm.add_constant(X) # res = sm.OLS(y, X).fit() # tp = res.pvalues # thresp.append(tp) # # print("coef ols: " + str(res.params)) # # print("sum ols: " + str(res.summary())) # coefthresols = np.reshape(np.array(res.params), (-1, 1)) # thresols.append(X.dot(coefthresols)) # thresiv.append(iv) # thresdata.append(d) # with open(outputdir + "/summary_threshold" + str(ti) + "-i" + str(intervl) + ".txt", "w") as file: # file.write(str(res.summary())) fig = plt.figure(figsize=FIG_SIZE) plt.plot([difftime(i[0]) for i in intervals], data, label="average sentiment") plt.grid(True) for i in range(len(data)): va = "center" if 0 < i < len(data) - 1: if data[i - 1] < data[i] and data[i + 1] < data[i]: va = "bottom" elif data[i - 1] > data[i] and data[i + 1] > data[i]: va = "top" elif i == 0: if data[i + 1] < data[i]: va = "bottom" else: va = "top" elif i == len(data) - 1: if data[i - 1] < data[i]: va = "bottom" else: va = "top" plt.text(difftime(intervals[i][0]), data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va) plt.plot([difftime(intervals[i][0]) for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS") # print("shape: " + str(np.shape(thresdata))) # for (ti, t) in enumerate(thresholds): # # print("shape1: " + str(np.shape(thresdata[ti]))) # plt.plot([difftime(thresiv[ti][i][0]) for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")") plt.title("Average sentiments for new users") plt.xticks(rotation=90) plt.xlabel("months") plt.ylabel("sentiment") plt.legend(loc="upper left") outfile = outputdir + "/average_sentiments-i" + str(intervl) + ".png" plt.savefig(outfile, bbox_inches='tight') plt.close(fig) def difftime(i): return i def genData(): # jumpup = {i: [0.31 for j in range((i*1337)%200 + 200)] for i in range(-15, 16)} jumpup = {} for i in range(-15, 0): r = random.random() jumpup[i] = ([0.10 + r / 20 for j in range(((20 + i) * 1337) % 200 + 200)]) for i in range(0, 16): r = random.random() jumpup[i] = ([0.15 + r / 20 for j in range(((20 + i) * 1337) % 200 + 200)]) return jumpup if __name__ == "__main__": main()