wip
This commit is contained in:
@@ -37,11 +37,6 @@ def main(folder, intervl):
|
|||||||
plotbydateold(onlyfiles, oldfiles, outputdir, intervl)
|
plotbydateold(onlyfiles, oldfiles, outputdir, intervl)
|
||||||
|
|
||||||
|
|
||||||
class fake:
|
|
||||||
def __init__(self, p, s):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def g(srcfile, outputdir, intervals):
|
def g(srcfile, outputdir, intervals):
|
||||||
print("ks global")
|
print("ks global")
|
||||||
avgss2 = readavgsentsingle(srcfile)
|
avgss2 = readavgsentsingle(srcfile)
|
||||||
|
|||||||
30
its.py
30
its.py
@@ -13,7 +13,7 @@ from loader import load, dmt, cms
|
|||||||
from sentiments import readtoxleveltxt
|
from sentiments import readtoxleveltxt
|
||||||
|
|
||||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||||
thresholds = [2, 3, 4, 5, 6]
|
thresholds = [3, 4, 5, 6]
|
||||||
changedate = datetime.fromisoformat("2018-09-01T00:00:00")
|
changedate = datetime.fromisoformat("2018-09-01T00:00:00")
|
||||||
|
|
||||||
|
|
||||||
@@ -35,7 +35,9 @@ def main(folder, intervl):
|
|||||||
count = []
|
count = []
|
||||||
for (option_date_from, option_date_to) in intervals:
|
for (option_date_from, option_date_to) in intervals:
|
||||||
if option_date_to <= datetime.fromisoformat("2015-01-01T00:00:00"):
|
if option_date_to <= datetime.fromisoformat("2015-01-01T00:00:00"):
|
||||||
|
datasingle.append(float("nan"))
|
||||||
data.append(float("nan"))
|
data.append(float("nan"))
|
||||||
|
count.append(float("nan"))
|
||||||
continue
|
continue
|
||||||
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
||||||
# avg sentiments
|
# avg sentiments
|
||||||
@@ -50,11 +52,26 @@ def main(folder, intervl):
|
|||||||
data.append(avg)
|
data.append(avg)
|
||||||
count.append(len(filtered))
|
count.append(len(filtered))
|
||||||
|
|
||||||
|
avgcount = np.mean([x for x in count if str(x) != "nan"])
|
||||||
|
stdcount = np.std([x for x in count if str(x) != "nan"])
|
||||||
|
for i in range(len(count)):
|
||||||
|
print(count[i])
|
||||||
|
if count[i] == 45:
|
||||||
|
print("m " + str(avgcount))
|
||||||
|
print("s " + str(stdcount))
|
||||||
|
print("N " + str((count[i] - avgcount) / stdcount))
|
||||||
|
if str(count[i]) == "nan" or np.abs((count[i] - avgcount) / stdcount) > 3:
|
||||||
|
datasingle[i] = float("nan")
|
||||||
|
data[i] = float("nan")
|
||||||
|
count[i] = float("nan")
|
||||||
|
|
||||||
# filter nan entries
|
# filter nan entries
|
||||||
for i in range(len(data)):
|
for i in range(len(data)):
|
||||||
while i < len(data) and str(data[i]) == "nan":
|
while i < len(data) and str(data[i]) == "nan":
|
||||||
|
del datasingle[i]
|
||||||
del data[i]
|
del data[i]
|
||||||
del intervals[i]
|
del intervals[i]
|
||||||
|
del count[i]
|
||||||
|
|
||||||
print("Computing full ITS")
|
print("Computing full ITS")
|
||||||
t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
t = np.reshape(np.array([i for i in range(len(datasingle)) for j in datasingle[i]]), (-1, 1))
|
||||||
@@ -79,10 +96,11 @@ def main(folder, intervl):
|
|||||||
thresp = []
|
thresp = []
|
||||||
print("Computing threshold ITS")
|
print("Computing threshold ITS")
|
||||||
for ti in thresholds:
|
for ti in thresholds:
|
||||||
print(1, changedate - relativedelta(months=ti))
|
# print(1, changedate - relativedelta(months=ti))
|
||||||
print(2, changedate + relativedelta(months=ti))
|
# print(2, changedate + relativedelta(months=ti))
|
||||||
z = [(i, x) for (i, x) in zip(intervals, datasingle) if i[0] >= changedate - relativedelta(months=ti) and i[1] <= changedate + relativedelta(months=ti)]
|
z = [(i, x) for (i, x) in zip(intervals, datasingle) if i[0] >= changedate - relativedelta(months=ti) and i[1] <= changedate + relativedelta(months=ti)]
|
||||||
iv = [i for (i, x) in z]
|
iv = [i for (i, x) in z]
|
||||||
|
# print("iv " + str(iv))
|
||||||
d = [x for (i, x) in z]
|
d = [x for (i, x) in z]
|
||||||
t = np.reshape(np.array([i for i in range(len(d)) for j in d[i]]), (-1, 1))
|
t = np.reshape(np.array([i for i in range(len(d)) for j in d[i]]), (-1, 1))
|
||||||
x = np.reshape(np.array([(0 if iv[i][1] <= changedate else 1) for i in range(len(d)) for j in d[i]]), (-1, 1))
|
x = np.reshape(np.array([(0 if iv[i][1] <= changedate else 1) for i in range(len(d)) for j in d[i]]), (-1, 1))
|
||||||
@@ -125,9 +143,9 @@ def main(folder, intervl):
|
|||||||
va = "top"
|
va = "top"
|
||||||
plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
plt.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
||||||
plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
plt.plot([intervals[i][0] for i in range(len(datasingle)) for j in datasingle[i]], its2ols, label="sm single ITS (pvalues " + str(p2) + ")")
|
||||||
print("shape: " + str(np.shape(thresdata)))
|
# print("shape: " + str(np.shape(thresdata)))
|
||||||
for (ti, t) in enumerate(thresholds):
|
for (ti, t) in enumerate(thresholds):
|
||||||
print("shape1: " + str(np.shape(thresdata[ti])))
|
# print("shape1: " + str(np.shape(thresdata[ti])))
|
||||||
plt.plot([thresiv[ti][i][0] for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
plt.plot([thresiv[ti][i][0] for i in range(len(thresdata[ti])) for j in thresdata[ti][i]], thresols[ti], label="thres ITS " + str(t) + " months (pvalues " + str(thresp[ti]) + ")")
|
||||||
plt.title("Average sentiments for new users")
|
plt.title("Average sentiments for new users")
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
@@ -149,7 +167,7 @@ if __name__ == "__main__":
|
|||||||
if not os.path.isdir(folder):
|
if not os.path.isdir(folder):
|
||||||
print(folder + " is not a folder")
|
print(folder + " is not a folder")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
interval = 3
|
interval = 1
|
||||||
if len(sys.argv) >= 3:
|
if len(sys.argv) >= 3:
|
||||||
if sys.argv[2].startswith("-i"):
|
if sys.argv[2].startswith("-i"):
|
||||||
interval = sys.argv[2][2:]
|
interval = sys.argv[2][2:]
|
||||||
|
|||||||
4
notes
4
notes
@@ -36,8 +36,8 @@ http://lindenconsulting.org/documents/Weighted_TSA_Article.pdf
|
|||||||
|
|
||||||
-------
|
-------
|
||||||
|
|
||||||
outliner filtern 57 /2000 senitment values in its
|
outliner filtern 57 /2000 senitment values in its > done
|
||||||
threshold 2,3,4,5,6 monate vor und zurück in its neu kurven andere farben
|
threshold 2,3,4,5,6 monate vor und zurück in its neu kurven andere farben>done
|
||||||
auswertung up downvotes und correlation mit sentiment
|
auswertung up downvotes und correlation mit sentiment
|
||||||
activität neuer user vorher und nachher
|
activität neuer user vorher und nachher
|
||||||
|
|
||||||
|
|||||||
2
run.py
2
run.py
@@ -12,7 +12,7 @@ def main(folder, intervl):
|
|||||||
analyze_batch.main(folder, intervl)
|
analyze_batch.main(folder, intervl)
|
||||||
calctoxdiff.main(folder, intervl)
|
calctoxdiff.main(folder, intervl)
|
||||||
posthist.main(folder, intervl)
|
posthist.main(folder, intervl)
|
||||||
its.main(folder, intervl)
|
its.main(folder, 1)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user