This commit is contained in:
wea_ondara
2020-01-23 13:04:33 +01:00
parent 6f754c0f53
commit 8877747692
2 changed files with 49 additions and 2 deletions

View File

@@ -9,16 +9,22 @@ import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import ks_2samp
from common import imprt, IMAGE_MAGICK
from analyze_batch import readavgsentsingle
from common import imprt, IMAGE_MAGICK, calc_intervals
from loader import load
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
def main(folder, intervl):
users, posts, firstcontrib, sumcontrib = load(folder)
outputdir = folder + "/output/ksbatch/"
os.system("mkdir -p " + outputdir)
srcfolder = folder + "/output/batch/"
g(srcfolder + "/averagesentiment.txt", outputdir, calc_intervals(posts, intervl))
onlyfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "newusers" in f and "i" + str(intervl) in f]
onlyfiles = sorted(onlyfiles)
@@ -31,6 +37,47 @@ def main(folder, intervl):
plotbydateold(onlyfiles, oldfiles, outputdir, intervl)
class fake:
def __init__(self, p, s):
pass
def g(srcfile, outputdir, intervals):
print("ks global")
avgss2 = readavgsentsingle(srcfile)
kscom = []
single = []
for i in range(1, 6):
kscom.append(ks_2samp([np.mean(x) if len(x) > 0 else float("nan") for x in avgss2[0]], [np.mean(x) if len(x) > 0 else float("nan") for x in avgss2[i]]))
s = []
for j in range(len(avgss2[0])):
s.append(ks_2samp(avgss2[0][j], avgss2[i][j]) if len(avgss2[i][j]) > 0 and len(avgss2[0][j]) else float("nan"))
single.append(s)
fig = plt.figure(figsize=(16, 12))
for i in range(len(single)):
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.pvalue for s in single[i]], label=str(i + 1) + " posts - most posters")
plt.title("KS 2-sided test for sentiments (X posts to 95%tile posters)")
plt.xticks(rotation=90)
plt.xlabel("Comparision: time frame X - time frame X+1")
plt.ylabel("pvalue")
plt.legend(loc="upper right")
plt.savefig(outputdir + "/ks_averagesentiments_pval.png", bbox_inches='tight')
plt.close(fig)
fig = plt.figure(figsize=(16, 12))
for i in range(len(single)):
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.statistic for s in single[i]], label=str(i + 1) + " posts - most posters")
plt.title("KS 2-sided test for sentiments (X posts to 95%tile posters)")
plt.xticks(rotation=90)
plt.xlabel("Comparision: time frame X - time frame X+1")
plt.ylabel("statistic")
plt.legend(loc="upper right")
plt.savefig(outputdir + "/ks_averagesentiments_stat.png", bbox_inches='tight')
plt.close(fig)
def plotbypost(onlyfiles, outputdir, intervl):
print("plotbypost")
files = defaultdict(list)