import os import sys from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from loader import load, dmt printnoln = lambda text: print(text, end='', flush=True) rprint = lambda text: print('\r' + text) DAYS_NEW_USER = 7 OLD_USER_YEAR = 3 analyser = SentimentIntensityAnalyzer() colors = ['red', 'green', 'blue', 'orange', 'deeppink'] def main(folder): users, posts, firstcontrib, sumcontrib = load(folder) outfolder = folder + "/output/" os.system("mkdir -p " + outfolder) outfilename = outfolder + "sentiments" # computer toxic levels # start = cms() # printnoln("computing toxic levels: filtering") # toxlevels = defaultdict(list) # for (i, post) in enumerate(posts): # if (i + 1) % 100 == 0: # printnoln("\rcomputing toxic levels: post #" + str(i + 1) + "/" + str(len(posts))) # if (i + 1) == len(posts): # printnoln("\rcomputing toxic levels: post #" + str(i + 1) + "/" + str(len(posts))) # for a in post['Answers']: # toxlevel = computeToxLevel(a['Body']) # toxlevels[post['Id']].append(toxlevel) # rprint("computing toxic levels: post #" + str(len(posts)) + "/" + str(len(posts)) + " ... took " + str(cms() - start) + "ms") toxlevels = dmt(posts, 10).map(lambda p: (p['Id'], {a['Id']: computeToxLevel(a['Body']) for a in p['Answers']}), "calculating sentiments").getresults() toxlevels = {id: p for (id, p) in toxlevels} dumptoxlevels(toxlevels, outfilename + ".py") def computeToxLevel(text): return analyser.polarity_scores(text) def dumptoxlevels(lvls, filename): answers = dict() for p in lvls.values(): for id, a in p.items(): answers[id] = a with open(filename, "w") as file: file.write("posts = " + str(lvls) + "\n") file.write("answers = " + str(answers) + "\n") if __name__ == "__main__": # execute only if run as a script usage = sys.argv[0] + " " if len(sys.argv) < 2: print(usage) sys.exit(1) folder = sys.argv[1] if not os.path.isdir(folder): print(folder + " is not a folder") sys.exit(1) main(folder)