wip
This commit is contained in:
@@ -7,34 +7,33 @@ from math import ceil
|
|||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
|
||||||
|
|
||||||
from common import calc_intervals, imprt, FigSaver
|
from common import calc_intervals, imprt, printnoln, rprint, DAYS_NEW_USER, IMAGE_MAGICK
|
||||||
from loader import load, dmt, cms
|
from loader import load, dmt, cms
|
||||||
|
|
||||||
printnoln = lambda text: print(text, end='', flush=True)
|
|
||||||
rprint = lambda text: print('\r' + text)
|
|
||||||
|
|
||||||
DAYS_NEW_USER = 7
|
|
||||||
OLD_USER_YEAR = 3
|
|
||||||
OLD_USER_PERCENTILE = 0.95
|
OLD_USER_PERCENTILE = 0.95
|
||||||
|
|
||||||
analyser = SentimentIntensityAnalyzer()
|
|
||||||
figsaver = FigSaver()
|
|
||||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||||
|
|
||||||
|
|
||||||
def main(folder):
|
def main(folder, intervl):
|
||||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||||
|
|
||||||
intervals = calc_intervals(posts)
|
intervals = calc_intervals(posts, intervl)
|
||||||
cachedsentiments = imprt(folder + "/output/sentiments.py").answers
|
cachedsentiments = imprt(folder + "/output/sentiments.py").answers
|
||||||
|
|
||||||
outfolder = folder + "/output/batch/"
|
outputdir = folder + "/output/batch/"
|
||||||
os.system("mkdir -p " + outfolder)
|
os.system("mkdir -p " + outputdir)
|
||||||
|
|
||||||
postcounts = range(1, 5 + 1)
|
postcounts = range(1, 5 + 1)
|
||||||
|
|
||||||
|
magickpost = {i: IMAGE_MAGICK for i in postcounts}
|
||||||
|
magickold = IMAGE_MAGICK
|
||||||
|
magickglobal = IMAGE_MAGICK
|
||||||
|
|
||||||
for (option_date_from, option_date_to) in intervals:
|
for (option_date_from, option_date_to) in intervals:
|
||||||
|
magickdate = IMAGE_MAGICK
|
||||||
|
|
||||||
# get questions for option_date_from <= creation date < option_date_to
|
# get questions for option_date_from <= creation date < option_date_to
|
||||||
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filter posts by dates").getresults()
|
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filter posts by dates").getresults()
|
||||||
if len(newposts) == 0:
|
if len(newposts) == 0:
|
||||||
@@ -51,8 +50,8 @@ def main(folder):
|
|||||||
gpos = []
|
gpos = []
|
||||||
gcom = []
|
gcom = []
|
||||||
|
|
||||||
goutfilenamenewusers = outfolder + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
goutfilenamenewusers = outputdir + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||||
goutfilenameoldusers = outfolder + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
goutfilenameoldusers = outputdir + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||||
|
|
||||||
for option_posts in postcounts:
|
for option_posts in postcounts:
|
||||||
# print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + " - #posts: " + str(option_posts))
|
# print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + " - #posts: " + str(option_posts))
|
||||||
@@ -122,6 +121,9 @@ def main(folder):
|
|||||||
fig.savefig(outfilename + ".png", bbox_inches='tight')
|
fig.savefig(outfilename + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
||||||
|
magickpost[option_posts] += " " + outfilename + ".png"
|
||||||
|
magickdate += " " + outfilename + ".png"
|
||||||
|
os.system(magickdate + " " + goutfilenamenewusers + ".pdf")
|
||||||
|
|
||||||
# global
|
# global
|
||||||
start = cms()
|
start = cms()
|
||||||
@@ -146,6 +148,7 @@ def main(folder):
|
|||||||
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
|
gfig.savefig(goutfilenamenewusers + ".png", bbox_inches='tight')
|
||||||
plt.close(gfig)
|
plt.close(gfig)
|
||||||
rprint("global plot post ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
rprint("global plot post ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
||||||
|
magickglobal += " " + goutfilenamenewusers + ".png"
|
||||||
|
|
||||||
# for old users ---------------------------------------------------------------------------------
|
# for old users ---------------------------------------------------------------------------------
|
||||||
start = cms()
|
start = cms()
|
||||||
@@ -192,18 +195,16 @@ def main(folder):
|
|||||||
|
|
||||||
# plt.show()
|
# plt.show()
|
||||||
fig.suptitle("Sentiment of answers to posts by most posting users (95%tile)\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
fig.suptitle("Sentiment of answers to posts by most posting users (95%tile)\nPosts created between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
|
||||||
# figsaver.save(fig, goutfilenameoldusers + ".png", bbox_inches='tight')
|
|
||||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ...")
|
||||||
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
|
fig.savefig(goutfilenameoldusers + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
||||||
|
magickold += " " + goutfilenameoldusers + ".png"
|
||||||
|
|
||||||
figsaver.join()
|
os.system(magickglobal + " batch_newusers.pdf")
|
||||||
figsaver.join()
|
os.system(magickold + " batch_oldusers.pdf")
|
||||||
|
for (i, cmd) in magickpost.items():
|
||||||
|
os.system(cmd + " " + "batch_newusers_" + i + ".pdf")
|
||||||
def computeToxLevel(text):
|
|
||||||
return analyser.polarity_scores(text)
|
|
||||||
|
|
||||||
|
|
||||||
def dumptoxlevels(lvls, filename):
|
def dumptoxlevels(lvls, filename):
|
||||||
@@ -222,5 +223,20 @@ if __name__ == "__main__":
|
|||||||
if not os.path.isdir(folder):
|
if not os.path.isdir(folder):
|
||||||
print(folder + " is not a folder")
|
print(folder + " is not a folder")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
interval = 3
|
||||||
|
if len(sys.argv) >= 3:
|
||||||
|
if sys.argv[2].startswith("-i"):
|
||||||
|
interval = sys.argv[2][2:]
|
||||||
|
try:
|
||||||
|
interval = int(interval)
|
||||||
|
except ValueError:
|
||||||
|
print("-i: int required")
|
||||||
|
sys.exit(1)
|
||||||
|
if interval < 1 or interval > 12:
|
||||||
|
print("-i: only 1 - 12")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print("unknown parameter: " + sys.argv[2])
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
main(folder)
|
main(folder, interval)
|
||||||
|
|||||||
@@ -9,10 +9,9 @@ import matplotlib.pyplot as plt
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.stats import ks_2samp
|
from scipy.stats import ks_2samp
|
||||||
|
|
||||||
from common import imprt
|
from common import imprt, IMAGE_MAGICK
|
||||||
|
|
||||||
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
|
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
|
||||||
IMAGE_MAGICK = "magick"
|
|
||||||
|
|
||||||
|
|
||||||
def main(folder):
|
def main(folder):
|
||||||
|
|||||||
25
common.py
25
common.py
@@ -5,29 +5,36 @@ import matplotlib.pyplot as plt
|
|||||||
|
|
||||||
from loader import dmt
|
from loader import dmt
|
||||||
|
|
||||||
|
printnoln = lambda text: print(text, end='', flush=True)
|
||||||
|
rprint = lambda text: print('\r' + text)
|
||||||
|
|
||||||
def calc_intervals(posts):
|
DAYS_NEW_USER = 7
|
||||||
|
IMAGE_MAGICK = "magick"
|
||||||
|
|
||||||
|
|
||||||
|
def calc_intervals(posts, months=3):
|
||||||
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'],
|
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'],
|
||||||
"firstpost").getresults()
|
"firstpost").getresults()
|
||||||
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
|
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
|
||||||
|
|
||||||
# calc quarter beginning
|
# calc quarter beginning
|
||||||
firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||||
if firstpost.month not in (1, 4, 7, 10):
|
if (firstpost.month - 1) % months != 0:
|
||||||
firstpost = firstpost.replace(month={1: 1, 2: 1, 3: 1, 4: 4, 5: 4, 6: 4, 7: 7, 8: 7, 9: 7, 10: 10, 11: 10, 12: 10}[firstpost.month])
|
firstpost = firstpost.replace(month=firstpost.month - ((firstpost.month - 1) % months))
|
||||||
lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
||||||
if lastpost.month not in (1, 4, 7, 10):
|
if (lastpost.month - 1) % months != 0:
|
||||||
lastpost = lastpost.replace(month={1: 1, 2: 1, 3: 1, 4: 4, 5: 4, 6: 4, 7: 7, 8: 7, 9: 7, 10: 10, 11: 10, 12: 10}[lastpost.month])
|
lastpost = lastpost.replace(month=lastpost.month - ((lastpost.month - 1) % months))
|
||||||
# add 3 months to last post
|
# add 3 months to last post
|
||||||
if lastpost.month == 10:
|
if lastpost.month + months > 12:
|
||||||
lastpost = lastpost.replace(month=1, year=lastpost.year + 1)
|
lastpost = lastpost.replace(month=lastpost.month + months - 12, year=lastpost.year + 1)
|
||||||
else:
|
else:
|
||||||
lastpost = lastpost.replace(month=lastpost.month + 3)
|
lastpost = lastpost.replace(month=lastpost.month + months)
|
||||||
|
|
||||||
cdate = firstpost
|
cdate = firstpost
|
||||||
intervals = []
|
intervals = []
|
||||||
while cdate < lastpost:
|
while cdate < lastpost:
|
||||||
nextquarter = cdate.replace(month=(cdate.month + 3) % 12, year=cdate.year + (0 if cdate.month + 3 < 12 else 1))
|
nextmon = cdate.month + months
|
||||||
|
nextquarter = cdate.replace(month=nextmon if nextmon <=12 else nextmon-12, year=cdate.year + (0 if nextmon <= 12 else 1))
|
||||||
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
|
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
|
||||||
intervals.append((cdate, nextquarter))
|
intervals.append((cdate, nextquarter))
|
||||||
cdate = nextquarter
|
cdate = nextquarter
|
||||||
|
|||||||
75
posthist.py
75
posthist.py
@@ -1,34 +1,29 @@
|
|||||||
from datetime import datetime
|
|
||||||
from datetime import timedelta
|
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
import sys
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from loader import load, dmt, cms
|
|
||||||
import math
|
|
||||||
from common import calc_intervals
|
|
||||||
|
|
||||||
printnoln = lambda text: print(text, end='', flush=True)
|
import matplotlib.pyplot as plt
|
||||||
rprint = lambda text: print('\r' + text)
|
from matplotlib.ticker import MaxNLocator
|
||||||
|
|
||||||
DAYS_NEW_USER = 7
|
from common import calc_intervals, IMAGE_MAGICK
|
||||||
OLD_USER_YEAR = 3
|
from loader import load, dmt
|
||||||
|
|
||||||
analyser = SentimentIntensityAnalyzer()
|
|
||||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
||||||
|
|
||||||
|
|
||||||
def main(folder):
|
def main(folder, intervl):
|
||||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||||
intervals = calc_intervals(posts)
|
intervals = calc_intervals(posts, intervl)
|
||||||
|
|
||||||
|
outputdir = folder + "/output/posthist/"
|
||||||
|
os.system("mkdir -p " + outputdir)
|
||||||
|
|
||||||
|
activeusercounts = []
|
||||||
|
imgmagickcmd = IMAGE_MAGICK
|
||||||
for (option_date_from, option_date_to) in intervals:
|
for (option_date_from, option_date_to) in intervals:
|
||||||
print((option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y")))
|
print((option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y")))
|
||||||
|
|
||||||
# filter posts by option_date_from <= creation date <= option_date_to
|
# filter posts by option_date_from <= creation date <= option_date_to
|
||||||
# newusers = set(dmt(users).filter(lambda u: option_date_from <= u['CreationDate'] < option_date_to, "filtering users by creation").map(lambda u: u['Id'], "getting user ids").getresults())
|
|
||||||
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
|
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
|
||||||
|
|
||||||
postcounts = defaultdict(list)
|
postcounts = defaultdict(list)
|
||||||
@@ -37,10 +32,8 @@ def main(folder):
|
|||||||
postcounts[p['OwnerUserId']].append(p)
|
postcounts[p['OwnerUserId']].append(p)
|
||||||
i = i + 1
|
i = i + 1
|
||||||
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
|
||||||
# print("i: " + str(i) + " expected: " + str(len(newposts)) + " is: " + str(sum([pc for pc in postcounts.values()])))
|
activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys())))
|
||||||
|
|
||||||
outputdir = folder + "/output/posthist/"
|
|
||||||
os.system("mkdir -p " + outputdir)
|
|
||||||
histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||||
|
|
||||||
histdata = [pc for pc in postcounts.values()]
|
histdata = [pc for pc in postcounts.values()]
|
||||||
@@ -48,23 +41,22 @@ def main(folder):
|
|||||||
plt.hist(histdata, range(max(histdata, default=0) + 1))
|
plt.hist(histdata, range(max(histdata, default=0) + 1))
|
||||||
plt.yscale('log')
|
plt.yscale('log')
|
||||||
plt.ylim(bottom=0)
|
plt.ylim(bottom=0)
|
||||||
|
plt.xlabel("#posts")
|
||||||
|
plt.ylabel("#users with X posts")
|
||||||
|
fig.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
|
||||||
plt.title("Histogram for user post count registered between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
|
plt.title("Histogram for user post count registered between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
|
||||||
fig.savefig(histfilename + ".png", bbox_inches='tight')
|
fig.savefig(histfilename + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
imgmagickcmd += " " + histfilename + ".png"
|
||||||
|
os.system(imgmagickcmd + " " + outputdir + "/posthist.pdf")
|
||||||
|
|
||||||
|
fig = plt.figure(figsize=(16, 12))
|
||||||
def computeToxLevel(text):
|
plt.plot([x[0] for (x, y) in activeusercounts], [y for (x, y) in activeusercounts])
|
||||||
return analyser.polarity_scores(text)
|
plt.yscale('log')
|
||||||
|
plt.ylim(bottom=0)
|
||||||
|
plt.title("Active users")
|
||||||
def flatmap(arr):
|
fig.savefig(outputdir + "activeusers.png", bbox_inches='tight')
|
||||||
return [item for sublist in arr for item in sublist]
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
def dumptoxlevels(lvls, filename):
|
|
||||||
with open(filename, "w") as file:
|
|
||||||
file.write("from collections import defaultdict\n\n")
|
|
||||||
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@@ -77,5 +69,20 @@ if __name__ == "__main__":
|
|||||||
if not os.path.isdir(folder):
|
if not os.path.isdir(folder):
|
||||||
print(folder + " is not a folder")
|
print(folder + " is not a folder")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
interval = 3
|
||||||
|
if len(sys.argv) >= 3:
|
||||||
|
if sys.argv[2].startswith("-i"):
|
||||||
|
interval = sys.argv[2][2:]
|
||||||
|
try:
|
||||||
|
interval = int(interval)
|
||||||
|
except ValueError:
|
||||||
|
print("-i: int required")
|
||||||
|
sys.exit(1)
|
||||||
|
if interval < 1 or interval > 12:
|
||||||
|
print("-i: only 1 - 12")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print("unknown parameter: " + sys.argv[2])
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
main(folder)
|
main(folder, interval)
|
||||||
|
|||||||
@@ -5,14 +5,7 @@ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
|||||||
|
|
||||||
from loader import load, dmt
|
from loader import load, dmt
|
||||||
|
|
||||||
printnoln = lambda text: print(text, end='', flush=True)
|
|
||||||
rprint = lambda text: print('\r' + text)
|
|
||||||
|
|
||||||
DAYS_NEW_USER = 7
|
|
||||||
OLD_USER_YEAR = 3
|
|
||||||
|
|
||||||
analyser = SentimentIntensityAnalyzer()
|
analyser = SentimentIntensityAnalyzer()
|
||||||
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
|
|
||||||
|
|
||||||
|
|
||||||
def main(folder):
|
def main(folder):
|
||||||
@@ -22,20 +15,7 @@ def main(folder):
|
|||||||
os.system("mkdir -p " + outfolder)
|
os.system("mkdir -p " + outfolder)
|
||||||
outfilename = outfolder + "sentiments"
|
outfilename = outfolder + "sentiments"
|
||||||
|
|
||||||
# computer toxic levels
|
# compute toxic levels
|
||||||
# start = cms()
|
|
||||||
# printnoln("computing toxic levels: filtering")
|
|
||||||
|
|
||||||
# toxlevels = defaultdict(list)
|
|
||||||
# for (i, post) in enumerate(posts):
|
|
||||||
# if (i + 1) % 100 == 0:
|
|
||||||
# printnoln("\rcomputing toxic levels: post #" + str(i + 1) + "/" + str(len(posts)))
|
|
||||||
# if (i + 1) == len(posts):
|
|
||||||
# printnoln("\rcomputing toxic levels: post #" + str(i + 1) + "/" + str(len(posts)))
|
|
||||||
# for a in post['Answers']:
|
|
||||||
# toxlevel = computeToxLevel(a['Body'])
|
|
||||||
# toxlevels[post['Id']].append(toxlevel)
|
|
||||||
# rprint("computing toxic levels: post #" + str(len(posts)) + "/" + str(len(posts)) + " ... took " + str(cms() - start) + "ms")
|
|
||||||
toxlevels = dmt(posts, 10).map(lambda p: (p['Id'], {a['Id']: computeToxLevel(a['Body']) for a in p['Answers']}), "calculating sentiments").getresults()
|
toxlevels = dmt(posts, 10).map(lambda p: (p['Id'], {a['Id']: computeToxLevel(a['Body']) for a in p['Answers']}), "calculating sentiments").getresults()
|
||||||
toxlevels = {id: p for (id, p) in toxlevels}
|
toxlevels = {id: p for (id, p) in toxlevels}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user