wip
This commit is contained in:
27
loader.py
27
loader.py
@@ -58,6 +58,33 @@ def load(folder):
|
|||||||
return users, posts, firstcontrib, sumcontrib
|
return users, posts, firstcontrib, sumcontrib
|
||||||
|
|
||||||
|
|
||||||
|
def readVotes(folder):
|
||||||
|
file = folder + "/Votes.xml"
|
||||||
|
prefix = "readVotes: "
|
||||||
|
printnoln(prefix + "reading xml file ...")
|
||||||
|
|
||||||
|
now = cms()
|
||||||
|
items = [elem for event, elem in et.iterparse(file) if elem.tag == "row"]
|
||||||
|
rprint(prefix + "reading xml file ... took " + str(cms() - now) + "ms")
|
||||||
|
|
||||||
|
votes = dmt(items).map(mapvote, prefix + "mapping votes").getresults()
|
||||||
|
|
||||||
|
print(prefix + "done")
|
||||||
|
return votes
|
||||||
|
|
||||||
|
|
||||||
|
def mapvote(item):
|
||||||
|
tags = ['PostId', 'VoteTypeId', 'CreationDate']
|
||||||
|
datetags = ['CreationDate']
|
||||||
|
vote = {tag: getTag(item, tag) for tag in tags}
|
||||||
|
for tag in datetags:
|
||||||
|
if vote[tag] is not None:
|
||||||
|
vote[tag] = datetime.fromisoformat(vote[tag])
|
||||||
|
else:
|
||||||
|
print("map vote: tag " + tag + " is None: " + str(vote))
|
||||||
|
return vote
|
||||||
|
|
||||||
|
|
||||||
def computesumcontrib(posts):
|
def computesumcontrib(posts):
|
||||||
x1 = dmt(posts).map(lambda q: q['OwnerUserId'], "calc sum contrib q").getresults()
|
x1 = dmt(posts).map(lambda q: q['OwnerUserId'], "calc sum contrib q").getresults()
|
||||||
x2 = dmt(posts).map(lambda q: [a['OwnerUserId'] for a in q['Answers']], "calc sum contrib a").getresults()
|
x2 = dmt(posts).map(lambda q: [a['OwnerUserId'] for a in q['Answers']], "calc sum contrib a").getresults()
|
||||||
|
|||||||
44
votes.py
44
votes.py
@@ -9,7 +9,7 @@ from datetime import timedelta
|
|||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
|
|
||||||
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
|
from common import calc_intervals, printnoln, rprint, DAYS_NEW_USER
|
||||||
from loader import load, dmt, cms
|
from loader import load, dmt, cms, readVotes
|
||||||
from sentiments import readtoxleveltxt
|
from sentiments import readtoxleveltxt
|
||||||
|
|
||||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||||
@@ -51,8 +51,8 @@ def main(folder, intervl):
|
|||||||
# filter nan entries
|
# filter nan entries
|
||||||
for i in range(len(datasingle)):
|
for i in range(len(datasingle)):
|
||||||
if len(datasingle[i]) == 0:
|
if len(datasingle[i]) == 0:
|
||||||
datasingle = float("nan")
|
datasingle[i] = float("nan")
|
||||||
if len(datasingle[i]) == 0:
|
if len(scoresingle[i]) == 0:
|
||||||
scoresingle[i] = float("nan")
|
scoresingle[i] = float("nan")
|
||||||
|
|
||||||
print("Plotting ...")
|
print("Plotting ...")
|
||||||
@@ -79,14 +79,44 @@ def main(folder, intervl):
|
|||||||
va = "bottom"
|
va = "bottom"
|
||||||
else:
|
else:
|
||||||
va = "top"
|
va = "top"
|
||||||
ax.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + str(len(datasingle[i])), ha="center", va=va)
|
ax.text(intervals[i][0], data[i], ("n=" if i == 0 else "") + (str(len(datasingle[i])) if str(datasingle[i]) != "nan" else ""), ha="center", va=va)
|
||||||
plt.title("Average sentiments for new users")
|
plt.title("Average sentiments and score for new users")
|
||||||
plt.xticks(rotation=90)
|
plt.xticks(rotation=90)
|
||||||
ax.set_xlabel("months")
|
ax.set_xlabel("months")
|
||||||
ax.set_ylabel("sentiment")
|
ax.set_ylabel("sentiment")
|
||||||
ax.set_ylabel("score (votes)")
|
ax2.set_ylabel("score (votes)")
|
||||||
plt.legend(l1 + l2, [l.get_label() for l in l1 + l2], loc="upper right")
|
plt.legend(l1 + l2, [l.get_label() for l in l1 + l2], loc="upper right")
|
||||||
outfile = outputdir + "/average_sentiments-i" + str(intervl) + ".png"
|
outfile = outputdir + "/average_votes-i" + str(intervl) + ".png"
|
||||||
|
plt.savefig(outfile, bbox_inches='tight')
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
# votes over time
|
||||||
|
votes = readVotes(folder)
|
||||||
|
fig = plt.figure(figsize=(16, 12))
|
||||||
|
ivs = [(datetime.fromisoformat("2010-01-01T00:00:00"), datetime.fromisoformat(str(y) + "-01-01T00:00:00")) for y in range(2011, 2020)]
|
||||||
|
for interval in ivs:
|
||||||
|
print(interval[0].strftime("%d-%m-%Y") + " to " + interval[1].strftime("%d-%m-%Y"))
|
||||||
|
ivvotes = dmt(votes).filter(lambda v: interval[0] <= v['CreationDate'] < interval[1]).getresults()
|
||||||
|
scores = []
|
||||||
|
for (option_date_from, option_date_to) in intervals:
|
||||||
|
if option_date_to > interval[1]:
|
||||||
|
continue
|
||||||
|
intervalposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to
|
||||||
|
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= p['CreationDate']).getresults()
|
||||||
|
intervalpostsids = set(dmt(intervalposts).map(lambda p: p['Id']).getresults())
|
||||||
|
intervalvotes = dmt(ivvotes).filter(lambda v: v['PostId'] in intervalpostsids).getresults()
|
||||||
|
intervalscore = sum(dmt(intervalvotes).map(lambda v: 1 if v['VoteTypeId'] == "2" else (-1 if v['VoteTypeId'] == "3" else 0)).getresults())
|
||||||
|
intervalscore = intervalscore / len(intervalpostsids) if len(intervalpostsids) != 0 else float("nan")
|
||||||
|
scores.append(((option_date_from, option_date_to), intervalscore))
|
||||||
|
# if all(str(score) == "nan" for iv, score in scores)
|
||||||
|
# continue
|
||||||
|
plt.plot([iv[0] for iv, score in scores], [score for iv, score in scores], label=str(interval[0].year) + " - " + str(interval[1].year))
|
||||||
|
plt.title("Average score for new users over time")
|
||||||
|
plt.xlabel("months")
|
||||||
|
plt.ylabel("score")
|
||||||
|
plt.legend(loc="upper right")
|
||||||
|
plt.grid(True)
|
||||||
|
outfile = outputdir + "/average_votes_over_time-i" + str(intervl) + ".png"
|
||||||
plt.savefig(outfile, bbox_inches='tight')
|
plt.savefig(outfile, bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user