wip
This commit is contained in:
@@ -72,7 +72,7 @@ def main(folder, intervl):
|
|||||||
userid = post['OwnerUserId']
|
userid = post['OwnerUserId']
|
||||||
|
|
||||||
# check first contribution
|
# check first contribution
|
||||||
if firstcontrib[userid] + timedelta(days=DAYS_NEW_USER) < post['CreationDate']:
|
if firstcontrib[userid] + timedelta(days=DAYS_NEW_USER) <= post['CreationDate']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sortedposts[userid].append(post)
|
sortedposts[userid].append(post)
|
||||||
@@ -90,11 +90,11 @@ def main(folder, intervl):
|
|||||||
for (i, post) in enumerate(filteredposts):
|
for (i, post) in enumerate(filteredposts):
|
||||||
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
|
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
|
||||||
for a in post['Answers']:
|
for a in post['Answers']:
|
||||||
if a['Id'] in cachedsentiments.keys():
|
# if a['Id'] in cachedsentiments.keys():
|
||||||
toxlevel = cachedsentiments[a['Id']]
|
toxlevel = cachedsentiments[a['Id']]
|
||||||
else:
|
# else:
|
||||||
print("Sentiment not found for " + a['Id'])
|
# print("Sentiment not found for " + a['Id'])
|
||||||
continue
|
# continue
|
||||||
toxlevels.append(toxlevel)
|
toxlevels.append(toxlevel)
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
||||||
|
|
||||||
@@ -178,10 +178,10 @@ def main(folder, intervl):
|
|||||||
for (i, post) in enumerate(filteredposts):
|
for (i, post) in enumerate(filteredposts):
|
||||||
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
|
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
|
||||||
for a in post['Answers']:
|
for a in post['Answers']:
|
||||||
if a['Id'] in cachedsentiments.keys():
|
# if a['Id'] in cachedsentiments.keys():
|
||||||
toxlevel = cachedsentiments[a['Id']]
|
toxlevel = cachedsentiments[a['Id']]
|
||||||
else:
|
# else:
|
||||||
print("Sentiment not found for " + a['Id'])
|
# print("Sentiment not found for " + a['Id'])
|
||||||
toxlevels.append(toxlevel)
|
toxlevels.append(toxlevel)
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
||||||
avgsent[0].append(np.mean([s['compound'] for s in toxlevels]) if len(toxlevels) == 0 else 0)
|
avgsent[0].append(np.mean([s['compound'] for s in toxlevels]) if len(toxlevels) == 0 else 0)
|
||||||
|
|||||||
@@ -59,8 +59,8 @@ def g(srcfile, outputdir, intervals):
|
|||||||
plt.ylabel("pvalue")
|
plt.ylabel("pvalue")
|
||||||
plt.legend(loc="upper right")
|
plt.legend(loc="upper right")
|
||||||
plt.savefig(outputdir + "/ks_averagesentiments_pval.png", bbox_inches='tight')
|
plt.savefig(outputdir + "/ks_averagesentiments_pval.png", bbox_inches='tight')
|
||||||
|
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
fig = plt.figure(figsize=(16, 12))
|
fig = plt.figure(figsize=(16, 12))
|
||||||
for i in range(len(single)):
|
for i in range(len(single)):
|
||||||
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.statistic for s in single[i]], label=str(i + 1) + " posts - most posters")
|
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.statistic for s in single[i]], label=str(i + 1) + " posts - most posters")
|
||||||
|
|||||||
4
its.py
4
its.py
@@ -42,8 +42,8 @@ def main(folder, intervl):
|
|||||||
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
||||||
# avg sentiments
|
# avg sentiments
|
||||||
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
|
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
|
||||||
for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to
|
for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to
|
||||||
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']])
|
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > a['CreationDate']])
|
||||||
.filter(lambda p: p != [])
|
.filter(lambda p: p != [])
|
||||||
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
|
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
|
||||||
.getresults())
|
.getresults())
|
||||||
|
|||||||
28
loader.py
28
loader.py
@@ -72,15 +72,21 @@ def readVotes(folder):
|
|||||||
print(prefix + "done")
|
print(prefix + "done")
|
||||||
return votes
|
return votes
|
||||||
|
|
||||||
|
|
||||||
VOTE_TAGS = ['PostId', 'VoteTypeId', 'CreationDate']
|
VOTE_TAGS = ['PostId', 'VoteTypeId', 'CreationDate']
|
||||||
|
VOTE_DTAGS = ['CreationDate']
|
||||||
|
VOTE_ITAGS = ['PostId', 'VoteTypeId']
|
||||||
|
|
||||||
|
|
||||||
def mapvote(item):
|
def mapvote(item):
|
||||||
datetags = ['CreationDate']
|
|
||||||
vote = {tag: getTag(item, tag) for tag in VOTE_TAGS}
|
vote = {tag: getTag(item, tag) for tag in VOTE_TAGS}
|
||||||
for tag in datetags:
|
for tag in VOTE_DTAGS:
|
||||||
if vote[tag] is not None:
|
if vote[tag] is not None:
|
||||||
vote[tag] = datetime.fromisoformat(vote[tag])
|
vote[tag] = datetime.fromisoformat(vote[tag])
|
||||||
else:
|
else:
|
||||||
print("map vote: tag " + tag + " is None: " + str(vote))
|
print("map vote: tag " + tag + " is None: " + str(vote))
|
||||||
|
for tag in VOTE_ITAGS:
|
||||||
|
vote[tag] = int(vote[tag])
|
||||||
return vote
|
return vote
|
||||||
|
|
||||||
|
|
||||||
@@ -112,8 +118,12 @@ def computefirstcontrib(posts):
|
|||||||
firstcontrib = {id: min(ldate) for (id, ldate) in firstcontrib.items()}
|
firstcontrib = {id: min(ldate) for (id, ldate) in firstcontrib.items()}
|
||||||
return firstcontrib
|
return firstcontrib
|
||||||
|
|
||||||
|
|
||||||
USER_TAGS = ['Id', 'CreationDate']
|
USER_TAGS = ['Id', 'CreationDate']
|
||||||
USER_DTAGS = ['CreationDate']
|
USER_DTAGS = ['CreationDate']
|
||||||
|
USER_ITAGS = ['Id']
|
||||||
|
|
||||||
|
|
||||||
def mapuser(item):
|
def mapuser(item):
|
||||||
user = {tag: getTag(item, tag) for tag in USER_TAGS}
|
user = {tag: getTag(item, tag) for tag in USER_TAGS}
|
||||||
for tag in USER_DTAGS:
|
for tag in USER_DTAGS:
|
||||||
@@ -121,24 +131,38 @@ def mapuser(item):
|
|||||||
user[tag] = datetime.fromisoformat(user[tag])
|
user[tag] = datetime.fromisoformat(user[tag])
|
||||||
else:
|
else:
|
||||||
print("map user: tag " + tag + " is None: " + str(user))
|
print("map user: tag " + tag + " is None: " + str(user))
|
||||||
|
for tag in USER_ITAGS:
|
||||||
|
user[tag] = int(user[tag])
|
||||||
return user
|
return user
|
||||||
|
|
||||||
|
|
||||||
Q_TAGS = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
|
Q_TAGS = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
|
||||||
Q_DTAGS = ['CreationDate']
|
Q_DTAGS = ['CreationDate']
|
||||||
|
Q_ITAGS = ['Id', 'OwnerUserId', 'Score']
|
||||||
Q_BODY = 'Body'
|
Q_BODY = 'Body'
|
||||||
|
|
||||||
|
|
||||||
def mapQuestion(item):
|
def mapQuestion(item):
|
||||||
question = {tag: getTag(item, tag) for tag in Q_TAGS}
|
question = {tag: getTag(item, tag) for tag in Q_TAGS}
|
||||||
for tag in Q_DTAGS:
|
for tag in Q_DTAGS:
|
||||||
question[tag] = datetime.fromisoformat(question[tag])
|
question[tag] = datetime.fromisoformat(question[tag])
|
||||||
|
for tag in Q_ITAGS:
|
||||||
|
question[tag] = int(question[tag]) if question[tag] is not None else None
|
||||||
question[Q_BODY] = removetags(html.unescape(question[Q_BODY]))
|
question[Q_BODY] = removetags(html.unescape(question[Q_BODY]))
|
||||||
return question
|
return question
|
||||||
|
|
||||||
|
|
||||||
A_TAGS = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
|
A_TAGS = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
|
||||||
A_DTAGS = ['CreationDate']
|
A_DTAGS = ['CreationDate']
|
||||||
|
A_ITAGS = ['Id', 'ParentId', 'OwnerUserId', 'Score']
|
||||||
|
|
||||||
|
|
||||||
def mapAnswer(item):
|
def mapAnswer(item):
|
||||||
answer = {tag: getTag(item, tag) for tag in A_TAGS}
|
answer = {tag: getTag(item, tag) for tag in A_TAGS}
|
||||||
for tag in A_DTAGS:
|
for tag in A_DTAGS:
|
||||||
answer[tag] = datetime.fromisoformat(answer[tag])
|
answer[tag] = datetime.fromisoformat(answer[tag])
|
||||||
|
for tag in A_ITAGS:
|
||||||
|
answer[tag] = int(answer[tag]) if answer[tag] is not None else None
|
||||||
answer['Body'] = removetags(html.unescape(answer['Body']))
|
answer['Body'] = removetags(html.unescape(answer['Body']))
|
||||||
return answer
|
return answer
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ def main(folder, intervl):
|
|||||||
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
||||||
activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys())))
|
activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys())))
|
||||||
|
|
||||||
activitynewusersinmonth = defaultdict(int)
|
activitynewusersinmonth = defaultdict(int) # TODO match month exactly
|
||||||
for p in newposts:
|
for p in newposts:
|
||||||
if firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate']:
|
if firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) > p['CreationDate']:
|
||||||
activitynewusersinmonth[p['OwnerUserId']] += 1
|
activitynewusersinmonth[p['OwnerUserId']] += 1
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ def readtoxleveltxt(filename):
|
|||||||
ra = line.split(";")
|
ra = line.split(";")
|
||||||
ra = [l.split(":") for l in ra]
|
ra = [l.split(":") for l in ra]
|
||||||
# print("i1: " + str(ra[0:5]))
|
# print("i1: " + str(ra[0:5]))
|
||||||
ra = {id: {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [id, neg, neu, pos, compound] in ra}
|
ra = {int(id): {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [id, neg, neu, pos, compound] in ra}
|
||||||
# print("i1: " + str(ra)[0:500])
|
# print("i1: " + str(ra)[0:500])
|
||||||
|
|
||||||
return rq, ra
|
return rq, ra
|
||||||
|
|||||||
12
votes.py
12
votes.py
@@ -36,12 +36,12 @@ def main(folder, intervl):
|
|||||||
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
||||||
# avg sentiments
|
# avg sentiments
|
||||||
scores = (dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to
|
scores = (dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to
|
||||||
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= p['CreationDate'])
|
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) >= p['CreationDate'])
|
||||||
.map(lambda p: int(p['Score']))
|
.map(lambda p: p['Score'])
|
||||||
.getresults())
|
.getresults())
|
||||||
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
|
filtered = (dmt(posts).map(lambda p: [cachedsentiments[a['Id']]['compound']
|
||||||
for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to
|
for a in p['Answers'] if option_date_from <= p['CreationDate'] < option_date_to
|
||||||
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= a['CreationDate']])
|
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) >= a['CreationDate']])
|
||||||
.filter(lambda p: p != [])
|
.filter(lambda p: p != [])
|
||||||
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
|
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: [])
|
||||||
.getresults())
|
.getresults())
|
||||||
@@ -102,10 +102,10 @@ def main(folder, intervl):
|
|||||||
if option_date_to > interval[1]:
|
if option_date_to > interval[1]:
|
||||||
continue
|
continue
|
||||||
intervalposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to
|
intervalposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to
|
||||||
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) <= p['CreationDate']).getresults()
|
and firstcontrib[p['OwnerUserId']] + timedelta(days=DAYS_NEW_USER) >= p['CreationDate']).getresults()
|
||||||
intervalpostsids = set(dmt(intervalposts).map(lambda p: p['Id']).getresults())
|
intervalpostsids = set(dmt(intervalposts).map(lambda p: p['Id']).getresults())
|
||||||
intervalvotes = dmt(ivvotes).filter(lambda v: v['PostId'] in intervalpostsids).getresults()
|
intervalvotes = dmt(ivvotes).filter(lambda v: v['PostId'] in intervalpostsids).getresults()
|
||||||
intervalscore = sum(dmt(intervalvotes).map(lambda v: 1 if v['VoteTypeId'] == "2" else (-1 if v['VoteTypeId'] == "3" else 0)).getresults())
|
intervalscore = sum(dmt(intervalvotes).map(lambda v: 1 if v['VoteTypeId'] == 2 else (-1 if v['VoteTypeId'] == 3 else 0)).getresults())
|
||||||
intervalscore = intervalscore / len(intervalpostsids) if len(intervalpostsids) != 0 else float("nan")
|
intervalscore = intervalscore / len(intervalpostsids) if len(intervalpostsids) != 0 else float("nan")
|
||||||
scores.append(((option_date_from, option_date_to), intervalscore))
|
scores.append(((option_date_from, option_date_to), intervalscore))
|
||||||
# if all(str(score) == "nan" for iv, score in scores)
|
# if all(str(score) == "nan" for iv, score in scores)
|
||||||
|
|||||||
Reference in New Issue
Block a user