Files
master/sentiments.py
wea_ondara 2c1524a335 wip
2019-12-18 13:02:16 +01:00

123 lines
4.1 KiB
Python

import os
import sys
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from loader import load, dmt
from common import imprt
analyser = SentimentIntensityAnalyzer()
def main(folder):
users, posts, firstcontrib, sumcontrib = load(folder)
outfolder = folder + "/output/"
os.system("mkdir -p " + outfolder)
outfilename = outfolder + "sentiments"
# compute toxic levels
toxlevels = dmt(posts, 10).map(lambda p: (p['Id'], {a['Id']: computeToxLevel(a['Body']) for a in p['Answers']}), "calculating sentiments").getresults()
toxlevels = {id: p for (id, p) in toxlevels}
dumptoxlevels(toxlevels, outfilename + ".py")
dumptoxlevelstxt(toxlevels, outfilename + ".txt")
# (lvl2q, lvl2a) = readtoxleveltxt(outfilename + ".txt")
#
# s1 = str(toxlevels)
# s2 = str(lvl2q)
# # print("s1: " + s1)
# # print("s2: " + s2)
# if s1 != s2:
# print("not equal")
# else:
# print("equal")
#
# # print("s1: " + str(imprt(folder + "/output/sentiments.py").answers))
# # print("s2: " + str(lvl2a))
# if str(imprt(folder + "/output/sentiments.py").answers) != str(lvl2a):
# print("a not equal")
# else:
# print("a equal")
#
# if str(imprt(folder + "/output/sentiments.py").posts) != str(lvl2q):
# print("q not equal")
# else:
# print("q equal")
def computeToxLevel(text):
return analyser.polarity_scores(text)
def dumptoxlevels(lvls, filename):
answers = dict()
for p in lvls.values():
for id, a in p.items():
answers[id] = a
with open(filename, "w") as file:
file.write("posts = " + str(lvls) + "\n")
file.write("answers = " + str(answers) + "\n")
def dumptoxlevelstxt(lvls, filename):
answers = dict()
for p in lvls.values():
for id, a in p.items():
answers[id] = a
pstr = [str(id) + ":" + ";".join([str(aid) + ":" + str(a['neg']) + ":" + str(a['neu']) + ":" + str(a['pos']) + ":" + str(a['compound']) for (aid, a) in p.items()]) for (id, p) in lvls.items()]
astr = [str(id) + ":" + str(p['neg']) + ":" + str(p['neu']) + ":" + str(p['pos']) + ":" + str(p['compound']) for (id, p) in answers.items()]
pstr = ";;".join(pstr)
astr = ";".join(astr)
with open(filename, "w") as file:
file.write("posts=" + pstr + "\n")
file.write("answers=" + astr + "\n")
def readtoxleveltxt(filename):
lines = ""
with open(filename, 'r') as f:
lines = f.read()
lines = lines.split("\n")
rq = {}
ra = {}
for line in lines:
if line.startswith("posts="):
line = line[len("posts="):]
rq = line.split(";;") # split by q
# print("i1: " + str(rq[0:5]))
rq = [l.split(":", 1) for l in rq] # get q id
# print("i2: " + str(rq[0:5]))
rq = [(qid, [x.split(":") for x in a.split(";")]) if len(a) > 0 else (qid, []) for [qid, a] in rq]
# print("i3:" + str(rq))
# rq = {int(id): {int(1): "a" for x in a} for (id, a) in rq}
# rq = {int(id): {str(aid[0]): str(aid) for aid in a} for (id, a) in rq}
rq = {id: {aid: {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [aid, neg, neu, pos, compound] in a} for (id, a) in rq}
# print("i4:" + str(rq)[0:500])
# sys.exit()
elif line.startswith("answers="):
line = line[len("answers="):]
ra = line.split(";")
ra = [l.split(":") for l in ra]
# print("i1: " + str(ra[0:5]))
ra = {id: {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [id, neg, neu, pos, compound] in ra}
# print("i1: " + str(ra)[0:500])
return rq, ra
if __name__ == "__main__":
# execute only if run as a script
usage = sys.argv[0] + " <folder>"
if len(sys.argv) < 2:
print(usage)
sys.exit(1)
folder = sys.argv[1]
if not os.path.isdir(folder):
print(folder + " is not a folder")
sys.exit(1)
main(folder)