import os import sys from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from loader import load, dmp analyser = SentimentIntensityAnalyzer() def main(folder): users, posts, firstcontrib, sumcontrib = load(folder) outfolder = folder + "/output/" os.system("mkdir -p " + outfolder) outfilename = outfolder + "sentiments" # compute toxic levels toxlevels = dmp(posts, 100).map(lambda p: (p['Id'], {a['Id']: computeToxLevel(a['Body']) for a in p['Answers']}), "calculating sentiments").getresults() toxlevels = {id: p for (id, p) in toxlevels} dumptoxlevels(toxlevels, outfilename + ".py") dumptoxlevelstxt(toxlevels, outfilename + ".txt") # (lvl2q, lvl2a) = readtoxleveltxt(outfilename + ".txt") # # s1 = str(toxlevels) # s2 = str(lvl2q) # # print("s1: " + s1) # # print("s2: " + s2) # if s1 != s2: # print("not equal") # else: # print("equal") # # # print("s1: " + str(imprt(folder + "/output/sentiments.py").answers)) # # print("s2: " + str(lvl2a)) # if str(imprt(folder + "/output/sentiments.py").answers) != str(lvl2a): # print("a not equal") # else: # print("a equal") # # if str(imprt(folder + "/output/sentiments.py").posts) != str(lvl2q): # print("q not equal") # else: # print("q equal") def computeToxLevel(text): return analyser.polarity_scores(text) def dumptoxlevels(lvls, filename): answers = dict() for p in lvls.values(): for id, a in p.items(): answers[id] = a with open(filename, "w") as file: file.write("posts = " + str(lvls) + "\n") file.write("answers = " + str(answers) + "\n") def dumptoxlevelstxt(lvls, filename): answers = dict() for p in lvls.values(): for id, a in p.items(): answers[id] = a pstr = [str(id) + ":" + ";".join([str(aid) + ":" + str(a['neg']) + ":" + str(a['neu']) + ":" + str(a['pos']) + ":" + str(a['compound']) for (aid, a) in p.items()]) for (id, p) in lvls.items()] astr = [str(id) + ":" + str(p['neg']) + ":" + str(p['neu']) + ":" + str(p['pos']) + ":" + str(p['compound']) for (id, p) in answers.items()] pstr = ";;".join(pstr) astr = ";".join(astr) with open(filename, "w") as file: file.write("posts=" + pstr + "\n") file.write("answers=" + astr + "\n") def readtoxleveltxt(filename): lines = "" with open(filename, 'r') as f: lines = f.read() lines = lines.split("\n") rq = {} ra = {} for line in lines: if line.startswith("posts="): line = line[len("posts="):] rq = line.split(";;") # split by q # print("i1: " + str(rq[0:5])) rq = [l.split(":", 1) for l in rq] # get q id # print("i2: " + str(rq[0:5])) rq = [(qid, [x.split(":") for x in a.split(";")]) if len(a) > 0 else (qid, []) for [qid, a] in rq] # print("i3:" + str(rq)) # rq = {int(id): {int(1): "a" for x in a} for (id, a) in rq} # rq = {int(id): {str(aid[0]): str(aid) for aid in a} for (id, a) in rq} rq = {id: {aid: {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [aid, neg, neu, pos, compound] in a} for (id, a) in rq} # print("i4:" + str(rq)[0:500]) # sys.exit() elif line.startswith("answers="): line = line[len("answers="):] ra = line.split(";") ra = [l.split(":") for l in ra] # print("i1: " + str(ra[0:5])) ra = {id: {"neg": float(neg), "neu": float(neu), "pos": float(pos), "compound": float(compound)} for [id, neg, neu, pos, compound] in ra} # print("i1: " + str(ra)[0:500]) return rq, ra if __name__ == "__main__": # execute only if run as a script usage = sys.argv[0] + " " if len(sys.argv) < 2: print(usage) sys.exit(1) folder = sys.argv[1] if not os.path.isdir(folder): print(folder + " is not a folder") sys.exit(1) main(folder)