123 lines
4.1 KiB
Python
123 lines
4.1 KiB
Python
import os
|
|
import sys
|
|
|
|
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
|
|
|
from loader import load, dmt
|
|
from common import imprt
|
|
|
|
analyser = SentimentIntensityAnalyzer()
|
|
|
|
|
|
def main(folder):
|
|
users, posts, firstcontrib, sumcontrib = load(folder)
|
|
|
|
outfolder = folder + "/output/"
|
|
os.system("mkdir -p " + outfolder)
|
|
outfilename = outfolder + "sentiments"
|
|
|
|
# compute toxic levels
|
|
toxlevels = dmt(posts, 10).map(lambda p: (p['Id'], {a['Id']: computeToxLevel(a['Body']) for a in p['Answers']}), "calculating sentiments").getresults()
|
|
toxlevels = {id: p for (id, p) in toxlevels}
|
|
|
|
dumptoxlevels(toxlevels, outfilename + ".py")
|
|
dumptoxlevelstxt(toxlevels, outfilename + ".txt")
|
|
|
|
# (lvl2q, lvl2a) = readtoxleveltxt(outfilename + ".txt")
|
|
#
|
|
# s1 = str(toxlevels)
|
|
# s2 = str(lvl2q)
|
|
# # print("s1: " + s1)
|
|
# # print("s2: " + s2)
|
|
# if s1 != s2:
|
|
# print("not equal")
|
|
# else:
|
|
# print("equal")
|
|
#
|
|
# # print("s1: " + str(imprt(folder + "/output/sentiments.py").answers))
|
|
# # print("s2: " + str(lvl2a))
|
|
# if str(imprt(folder + "/output/sentiments.py").answers) != str(lvl2a):
|
|
# print("a not equal")
|
|
# else:
|
|
# print("a equal")
|
|
#
|
|
# if str(imprt(folder + "/output/sentiments.py").posts) != str(lvl2q):
|
|
# print("q not equal")
|
|
# else:
|
|
# print("q equal")
|
|
|
|
|
|
def computeToxLevel(text):
|
|
return analyser.polarity_scores(text)
|
|
|
|
|
|
def dumptoxlevels(lvls, filename):
|
|
answers = dict()
|
|
for p in lvls.values():
|
|
for id, a in p.items():
|
|
answers[id] = a
|
|
with open(filename, "w") as file:
|
|
file.write("posts = " + str(lvls) + "\n")
|
|
file.write("answers = " + str(answers) + "\n")
|
|
|
|
|
|
def dumptoxlevelstxt(lvls, filename):
|
|
answers = dict()
|
|
for p in lvls.values():
|
|
for id, a in p.items():
|
|
answers[id] = a
|
|
pstr = [str(id) + ":" + ";".join([str(aid) + ":" + str(a['neg']) + ":" + str(a['neu']) + ":" + str(a['pos']) + ":" + str(a['compound']) for (aid, a) in p.items()]) for (id, p) in lvls.items()]
|
|
astr = [str(id) + ":" + str(p['neg']) + ":" + str(p['neu']) + ":" + str(p['pos']) + ":" + str(p['compound']) for (id, p) in answers.items()]
|
|
pstr = ";;".join(pstr)
|
|
astr = ";".join(astr)
|
|
with open(filename, "w") as file:
|
|
file.write("posts=" + pstr + "\n")
|
|
file.write("answers=" + astr + "\n")
|
|
|
|
|
|
def readtoxleveltxt(filename):
|
|
lines = ""
|
|
with open(filename, 'r') as f:
|
|
lines = f.read()
|
|
lines = lines.split("\n")
|
|
|
|
rq = {}
|
|
ra = {}
|
|
for line in lines:
|
|
if line.startswith("posts="):
|
|
line = line[len("posts="):]
|
|
rq = line.split(";;") # split by q
|
|
# print("i1: " + str(rq[0:5]))
|
|
rq = [l.split(":", 1) for l in rq] # get q id
|
|
# print("i2: " + str(rq[0:5]))
|
|
rq = [(qid, [x.split(":") for x in a.split(";")]) if len(a) > 0 else (qid, []) for [qid, a] in rq]
|
|
# print("i3:" + str(rq))
|
|
# rq = {int(id): {int(1): "a" for x in a} for (id, a) in rq}
|
|
# rq = {int(id): {str(aid[0]): str(aid) for aid in a} for (id, a) in rq}
|
|
rq = {id: {aid: {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [aid, neg, neu, pos, compound] in a} for (id, a) in rq}
|
|
# print("i4:" + str(rq)[0:500])
|
|
# sys.exit()
|
|
elif line.startswith("answers="):
|
|
line = line[len("answers="):]
|
|
ra = line.split(";")
|
|
ra = [l.split(":") for l in ra]
|
|
# print("i1: " + str(ra[0:5]))
|
|
ra = {id: {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [id, neg, neu, pos, compound] in ra}
|
|
# print("i1: " + str(ra)[0:500])
|
|
|
|
return rq, ra
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# execute only if run as a script
|
|
usage = sys.argv[0] + " <folder>"
|
|
if len(sys.argv) < 2:
|
|
print(usage)
|
|
sys.exit(1)
|
|
folder = sys.argv[1]
|
|
if not os.path.isdir(folder):
|
|
print(folder + " is not a folder")
|
|
sys.exit(1)
|
|
|
|
main(folder)
|