wip
This commit is contained in:
5
its.py
5
its.py
@@ -55,11 +55,6 @@ def main(folder, intervl):
|
||||
avgcount = np.mean([x for x in count if str(x) != "nan"])
|
||||
stdcount = np.std([x for x in count if str(x) != "nan"])
|
||||
for i in range(len(count)):
|
||||
print(count[i])
|
||||
if count[i] == 45:
|
||||
print("m " + str(avgcount))
|
||||
print("s " + str(stdcount))
|
||||
print("N " + str((count[i] - avgcount) / stdcount))
|
||||
if str(count[i]) == "nan" or np.abs((count[i] - avgcount) / stdcount) > 3:
|
||||
datasingle[i] = float("nan")
|
||||
data[i] = float("nan")
|
||||
|
||||
@@ -100,7 +100,7 @@ def mapuser(item):
|
||||
|
||||
|
||||
def mapQuestion(item):
|
||||
tags = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName']
|
||||
tags = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
|
||||
datetags = ['CreationDate']
|
||||
question = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
@@ -110,7 +110,7 @@ def mapQuestion(item):
|
||||
|
||||
|
||||
def mapAnswer(item):
|
||||
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId']
|
||||
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
|
||||
datetags = ['CreationDate']
|
||||
answer = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
|
||||
4
notes
4
notes
@@ -38,8 +38,8 @@ http://lindenconsulting.org/documents/Weighted_TSA_Article.pdf
|
||||
|
||||
outliner filtern 57 /2000 senitment values in its > done
|
||||
threshold 2,3,4,5,6 monate vor und zurück in its neu kurven andere farben>done
|
||||
auswertung up downvotes und correlation mit sentiment
|
||||
activität neuer user vorher und nachher
|
||||
auswertung up downvotes und correlation mit sentiment >done
|
||||
activität neuer user vorher und nachher>done
|
||||
|
||||
|
||||
|
||||
|
||||
37
posthist.py
37
posthist.py
@@ -24,6 +24,7 @@ def main(folder, intervl):
|
||||
activeusercounts = []
|
||||
answerstonewusers = []
|
||||
sentimentstonewusers = []
|
||||
activitynewusers = []
|
||||
imgmagickcmd = IMAGE_MAGICK
|
||||
for (option_date_from, option_date_to) in intervals:
|
||||
print(option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y"))
|
||||
@@ -37,9 +38,21 @@ def main(folder, intervl):
|
||||
for p in newposts:
|
||||
postcounts[p['OwnerUserId']].append(p)
|
||||
i = i + 1
|
||||
# for a in p['Answers']:
|
||||
# postcounts[p['OwnerUserId']].append(a)
|
||||
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
||||
activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys())))
|
||||
|
||||
activitynewusersinmonth = defaultdict(int)
|
||||
for p in newposts:
|
||||
if firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate']:
|
||||
activitynewusersinmonth[p['OwnerUserId']] += 1
|
||||
for a in p['Answers']:
|
||||
if firstcontrib[a['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']:
|
||||
activitynewusersinmonth[p['OwnerUserId']] += 1
|
||||
activitysum = sum(activitynewusersinmonth.values())
|
||||
activitynewusers.append(((option_date_from, option_date_to), activitysum / len(activitynewusersinmonth)))
|
||||
|
||||
histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") + "-i" + str(intervl)
|
||||
|
||||
histdata = [pc for pc in postcounts.values()]
|
||||
@@ -73,8 +86,10 @@ def main(folder, intervl):
|
||||
# plot posts diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in activeusercounts], [y for (x, y) in activeusercounts])
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('#active users')
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0.001)
|
||||
plt.ylim(bottom=1)
|
||||
plt.title("Active users")
|
||||
fig.savefig(outputdir + "activeusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
@@ -82,9 +97,11 @@ def main(folder, intervl):
|
||||
# plot answers to new users diagram
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in answerstonewusers], [y for (x, y) in answerstonewusers])
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('#answers per question of a new user')
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0.001)
|
||||
plt.title("#Answers to new users")
|
||||
plt.ylim(bottom=1)
|
||||
plt.title("Answers to new users")
|
||||
fig.savefig(outputdir + "answerstonewusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
@@ -93,13 +110,25 @@ def main(folder, intervl):
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [b for (x, [y, b, n, g]) in sentimentstonewusers], label="Neg. answer")
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [n for (x, [y, b, n, g]) in sentimentstonewusers], label="Neu. answer")
|
||||
plt.plot([x[0] for (x, y) in sentimentstonewusers], [g for (x, [y, b, n, g]) in sentimentstonewusers], label="Pos. answer")
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('sentiment')
|
||||
plt.yscale('log')
|
||||
plt.ylim(bottom=0.001)
|
||||
plt.ylim(bottom=1)
|
||||
plt.legend(loc="upper right")
|
||||
plt.title("Sentiments of answers to new users")
|
||||
fig.savefig(outputdir + "sentimentstonewusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
# plot activity for new users
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
plt.plot([x[0] for (x, y) in activitynewusers], [y for x, y in activitynewusers], label="activity")
|
||||
plt.xlabel('time')
|
||||
plt.ylabel('#questions or answers created by a new user')
|
||||
plt.legend(loc="upper right")
|
||||
plt.title("Average activity per new user")
|
||||
fig.savefig(outputdir + "activitynewusers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# execute only if run as a script
|
||||
|
||||
120
votes.py
Normal file
120
votes.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import sys
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import os
|
||||
import statsmodels.api as sm
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
|
||||
from loader import load, dmt, cms
|
||||
from sentiments import readtoxleveltxt
|
||||
|
||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||
thresholds = [3, 4, 5, 6]
|
||||
changedate = datetime.fromisoformat("2018-09-01T00:00:00")
|
||||
|
||||
|
||||
def main(folder, intervl):
|
||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||
|
||||
intervals = calc_intervals(posts, intervl)
|
||||
|
||||
start = cms()
|
||||
printnoln("reading sentiments ...")
|
||||
(_, cachedsentiments) = readtoxleveltxt(folder + "/output/sentiments.txt")
|
||||
rprint("reading sentiments ... took " + str(cms() - start) + "ms")
|
||||
|
||||
outputdir = folder + "/output/votes/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
|
||||
datasingle = []
|
||||
scoresingle = []
|
||||
for (option_date_from, option_date_to) in intervals:
|
||||
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
||||
# avg sentiments
|
||||
scores = (dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to
|
||||
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= p['CreationDate'])
|
||||
.map(lambda p: int(p['Score']))
|
||||
.getresults())
|
||||
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
|
||||
for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to
|
||||
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']])
|
||||
.filter(lambda p: p != [])
|
||||
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
|
||||
.getresults())
|
||||
scoresingle.append(scores)
|
||||
datasingle.append(filtered)
|
||||
|
||||
# filter nan entries
|
||||
for i in range(len(datasingle)):
|
||||
if len(datasingle[i]) == 0:
|
||||
datasingle = float("nan")
|
||||
if len(datasingle[i]) == 0:
|
||||
scoresingle[i] = float("nan")
|
||||
|
||||
print("Plotting ...")
|
||||
fig, ax = plt.subplots(figsize=(16, 12))
|
||||
data = [np.mean(x) for x in datasingle]
|
||||
l1 = ax.plot([i[0] for i in intervals], data, label="average sentiment")
|
||||
ax2 = ax.twinx()
|
||||
l2 = ax2.plot([i[0] for i in intervals], [np.mean(x) for x in scoresingle], label="average score (votes)", color="red")
|
||||
plt.grid(True)
|
||||
for i in range(len(data)):
|
||||
va = "center"
|
||||
if 0 < i < len(data) - 1:
|
||||
if data[i - 1] < data[i] and data[i + 1] < data[i]:
|
||||
va = "bottom"
|
||||
elif data[i - 1] > data[i] and data[i + 1] > data[i]:
|
||||
va = "top"
|
||||
elif i == 0:
|
||||
if data[i + 1] < data[i]:
|
||||
va = "bottom"
|
||||
else:
|
||||
va = "top"
|
||||
elif i == len(data) - 1:
|
||||
if data[i - 1] < data[i]:
|
||||
va = "bottom"
|
||||
else:
|
||||
va = "top"
|
||||
ax.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
||||
plt.title("Average sentiments for new users")
|
||||
plt.xticks(rotation=90)
|
||||
ax.set_xlabel("months")
|
||||
ax.set_ylabel("sentiment")
|
||||
ax.set_ylabel("score (votes)")
|
||||
plt.legend(l1 + l2, [l.get_label() for l in l1 + l2], loc="upper right")
|
||||
outfile = outputdir + "/average_sentiments-i" + str(intervl) + ".png"
|
||||
plt.savefig(outfile, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# execute only if run as a script
|
||||
usage = sys.argv[0] + " <folder>"
|
||||
if len(sys.argv) < 2:
|
||||
print(usage)
|
||||
sys.exit(1)
|
||||
folder = sys.argv[1]
|
||||
if not os.path.isdir(folder):
|
||||
print(folder + " is not a folder")
|
||||
sys.exit(1)
|
||||
interval = 1
|
||||
if len(sys.argv) >= 3:
|
||||
if sys.argv[2].startswith("-i"):
|
||||
interval = sys.argv[2][2:]
|
||||
try:
|
||||
interval = int(interval)
|
||||
except ValueError:
|
||||
print("-i: int required")
|
||||
sys.exit(1)
|
||||
if interval < 1 or interval > 12:
|
||||
print("-i: only 1 - 12")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("unknown parameter: " + sys.argv[2])
|
||||
sys.exit(1)
|
||||
|
||||
main(folder, interval)
|
||||
Reference in New Issue
Block a user