added its
This commit is contained in:
121
its.py
Normal file
121
its.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import os
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
from common import calc_intervals, imprt, printnoln, rprint, DAYS_NEW_USER
|
||||
from loader import load, dmt, cms
|
||||
|
||||
OLD_USER_PERCENTILE = 0.95
|
||||
|
||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||
|
||||
|
||||
def main(folder, intervl):
|
||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||
|
||||
intervals = calc_intervals(posts, intervl)
|
||||
|
||||
start = cms()
|
||||
printnoln("reading sentiments ...")
|
||||
cachedsentiments = imprt(folder + "/output/sentiments.py").answers
|
||||
rprint("reading sentiments ... took " + str(cms() - start) + "ms")
|
||||
|
||||
outputdir = folder + "/output/its/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
|
||||
data = []
|
||||
for (option_date_from, option_date_to) in intervals:
|
||||
if option_date_to <= datetime.fromisoformat("2015-01-01T00:00:00"):
|
||||
data.append(float("nan"))
|
||||
continue
|
||||
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
||||
# avg sentiments
|
||||
# print(dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
|
||||
# for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to
|
||||
# and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']])
|
||||
# .filter(lambda p: p != [])
|
||||
# .getresults())
|
||||
# break
|
||||
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
|
||||
for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to
|
||||
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']])
|
||||
.filter(lambda p: p != [])
|
||||
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
|
||||
.getresults())
|
||||
avg = np.average(filtered) if len(filtered) > 0 else float("nan")
|
||||
data.append(avg)
|
||||
|
||||
# filter nan entries
|
||||
for i in range(len(data)):
|
||||
while i < len(data) and str(data[i]) == "nan":
|
||||
del data[i]
|
||||
del intervals[i]
|
||||
|
||||
print("Computing ITS ...")
|
||||
t = np.reshape(np.array([i for i in range(len(data))]), (-1, 1))
|
||||
# print("t", t)
|
||||
x = np.reshape(np.array([(0 if option_date_to <= datetime.fromisoformat("2018-09-01T00:00:00") else 1) for (option_date_from, option_date_to) in intervals]), (-1, 1))
|
||||
# print("x", x)
|
||||
X = np.reshape(np.array([data[0] for i in range(len(data))]), (-1, 1))
|
||||
# print("X", X)
|
||||
X = np.concatenate((X, t), 1)
|
||||
X = np.concatenate((X, x), 1)
|
||||
X = np.concatenate((X, np.multiply(t, x)), 1)
|
||||
y = np.reshape(np.array(data), (-1, 1))
|
||||
# print("Xfin", X)
|
||||
# print("y", y)
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, y)
|
||||
score = reg.score(X, y);
|
||||
coef = np.reshape(np.array(reg.coef_), (-1, 1))
|
||||
its = X.dot(coef) + data[0]
|
||||
print("score: " + str(score))
|
||||
print("coef: " + str(coef))
|
||||
print("its: " + str(its))
|
||||
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([i[0] for i in intervals], data, label="average sentiment")
|
||||
plt.plot([i[0] for i in intervals], its, label="ITS (score " + str(score) + ")")
|
||||
plt.title("Average sentiments for new users")
|
||||
plt.xticks(rotation=90)
|
||||
plt.xlabel("months")
|
||||
plt.ylabel("sentiment")
|
||||
plt.legend(loc="upper right")
|
||||
outfile = outputdir + "/average_sentiments.png"
|
||||
plt.savefig(outfile, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# execute only if run as a script
|
||||
usage = sys.argv[0] + " <folder>"
|
||||
if len(sys.argv) < 2:
|
||||
print(usage)
|
||||
sys.exit(1)
|
||||
folder = sys.argv[1]
|
||||
if not os.path.isdir(folder):
|
||||
print(folder + " is not a folder")
|
||||
sys.exit(1)
|
||||
interval = 3
|
||||
if len(sys.argv) >= 3:
|
||||
if sys.argv[2].startswith("-i"):
|
||||
interval = sys.argv[2][2:]
|
||||
try:
|
||||
interval = int(interval)
|
||||
except ValueError:
|
||||
print("-i: int required")
|
||||
sys.exit(1)
|
||||
if interval < 1 or interval > 12:
|
||||
print("-i: only 1 - 12")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("unknown parameter: " + sys.argv[2])
|
||||
sys.exit(1)
|
||||
|
||||
main(folder, interval)
|
||||
Reference in New Issue
Block a user