diff --git a/calctoxdiff.py b/calctoxdiff.py index 2f263d7..c0fe678 100644 --- a/calctoxdiff.py +++ b/calctoxdiff.py @@ -37,11 +37,6 @@ def main(folder, intervl): plotbydateold(onlyfiles, oldfiles, outputdir, intervl) -class fake: - def __init__(self, p, s): - pass - - def g(srcfile, outputdir, intervals): print("ks global") avgss2 = readavgsentsingle(srcfile) diff --git a/its.py b/its.py index 1b11b3c..d1edbb6 100644 --- a/its.py +++ b/its.py @@ -13,7 +13,7 @@ from loader import load, dmt, cms from sentiments import readtoxleveltxt colors = ['red', 'green', 'blue', 'orange', 'deeppink'] -thresholds = [2, 3, 4, 5, 6] +thresholds = [3, 4, 5, 6] changedate = datetime.fromisoformat("2018-09-01T00:00:00") @@ -35,7 +35,9 @@ def main(folder, intervl): count = [] for (option_date_from, option_date_to) in intervals: if option_date_to <= datetime.fromisoformat("2015-01-01T00:00:00"): + datasingle.append(float("nan")) data.append(float("nan")) + count.append(float("nan")) continue print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) # avg sentiments @@ -50,11 +52,26 @@ def main(folder, intervl): data.append(avg) count.append(len(filtered)) + avgcount = np.mean([x for x in count if str(x) != "nan"]) + stdcount = np.std([x for x in count if str(x) != "nan"]) + for i in range(len(count)): + print(count[i]) + if count[i] == 45: + print("m " + str(avgcount)) + print("s " + str(stdcount)) + print("N " + str((count[i] - avgcount) / stdcount)) + if str(count[i]) == "nan" or np.abs((count[i] - avgcount) / stdcount) > 3: + datasingle[i] = float("nan") + data[i] = float("nan") + count[i] = float("nan") + # filter nan entries for i in range(len(data)): while i < len(data) and str(data[i]) == "nan": + del datasingle[i] del data[i] del intervals[i] + del count[i] print("Computing full ITS") t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1)) @@ -79,10 +96,11 @@ def main(folder, intervl): thresp = [] print("Computing threshold ITS") for ti in thresholds: - print(1, changedate - relativedelta(months=ti)) - print(2, changedate + relativedelta(months=ti)) + # print(1, changedate - relativedelta(months=ti)) + # print(2, changedate + relativedelta(months=ti)) z = [(i, x) for (i, x) in zip(intervals, datasingle) if i[0] >= changedate - relativedelta(months=ti) and i[1] <= changedate + relativedelta(months=ti)] iv = [i for (i, x) in z] + # print("iv " + str(iv)) d = [x for (i, x) in z] t = np.reshape(np.array([i for i in range(len(d)) for j in d[i]]), (-1, 1)) x = np.reshape(np.array([(0 if iv[i][1] <= changedate else 1) for i in range(len(d)) for j in d[i]]), (-1, 1)) @@ -125,9 +143,9 @@ def main(folder, intervl): va = "top" plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va) plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")") - print("shape: " + str(np.shape(thresdata))) + # print("shape: " + str(np.shape(thresdata))) for (ti, t) in enumerate(thresholds): - print("shape1: " + str(np.shape(thresdata[ti]))) + # print("shape1: " + str(np.shape(thresdata[ti]))) plt.plot([thresiv[ti][i][0] for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")") plt.title("Average sentiments for new users") plt.xticks(rotation=90) @@ -149,7 +167,7 @@ if __name__ == "__main__": if not os.path.isdir(folder): print(folder + " is not a folder") sys.exit(1) - interval = 3 + interval = 1 if len(sys.argv) >= 3: if sys.argv[2].startswith("-i"): interval = sys.argv[2][2:] diff --git a/notes b/notes index 3cd468d..c070af4 100644 --- a/notes +++ b/notes @@ -36,8 +36,8 @@ http://lindenconsulting.org/documents/Weighted_TSA_Article.pdf ------- -outliner filtern 57 /2000 senitment values in its -threshold 2,3,4,5,6 monate vor und zurück in its neu kurven andere farben +outliner filtern 57 /2000 senitment values in its > done +threshold 2,3,4,5,6 monate vor und zurück in its neu kurven andere farben>done auswertung up downvotes und correlation mit sentiment activität neuer user vorher und nachher diff --git a/run.py b/run.py index 1524fb3..9e9cf4c 100644 --- a/run.py +++ b/run.py @@ -12,7 +12,7 @@ def main(folder, intervl): analyze_batch.main(folder, intervl) calctoxdiff.main(folder, intervl) posthist.main(folder, intervl) - its.main(folder, intervl) + its.main(folder, 1) pass