wip
This commit is contained in:
@@ -9,16 +9,22 @@ import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from scipy.stats import ks_2samp
|
||||
|
||||
from common import imprt, IMAGE_MAGICK
|
||||
from analyze_batch import readavgsentsingle
|
||||
from common import imprt, IMAGE_MAGICK, calc_intervals
|
||||
from loader import load
|
||||
|
||||
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
|
||||
|
||||
|
||||
def main(folder, intervl):
|
||||
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||
|
||||
outputdir = folder + "/output/ksbatch/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
srcfolder = folder + "/output/batch/"
|
||||
|
||||
g(srcfolder + "/averagesentiment.txt", outputdir, calc_intervals(posts, intervl))
|
||||
|
||||
onlyfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "newusers" in f and "i" + str(intervl) in f]
|
||||
onlyfiles = sorted(onlyfiles)
|
||||
|
||||
@@ -31,6 +37,47 @@ def main(folder, intervl):
|
||||
plotbydateold(onlyfiles, oldfiles, outputdir, intervl)
|
||||
|
||||
|
||||
class fake:
|
||||
def __init__(self, p, s):
|
||||
pass
|
||||
|
||||
|
||||
def g(srcfile, outputdir, intervals):
|
||||
print("ks global")
|
||||
avgss2 = readavgsentsingle(srcfile)
|
||||
|
||||
kscom = []
|
||||
single = []
|
||||
for i in range(1, 6):
|
||||
kscom.append(ks_2samp([np.mean(x) if len(x) > 0 else float("nan") for x in avgss2[0]], [np.mean(x) if len(x) > 0 else float("nan") for x in avgss2[i]]))
|
||||
s = []
|
||||
for j in range(len(avgss2[0])):
|
||||
s.append(ks_2samp(avgss2[0][j], avgss2[i][j]) if len(avgss2[i][j]) > 0 and len(avgss2[0][j]) else float("nan"))
|
||||
single.append(s)
|
||||
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for i in range(len(single)):
|
||||
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.pvalue for s in single[i]], label=str(i + 1) + " posts - most posters")
|
||||
plt.title("KS 2-sided test for sentiments (X posts to 95%tile posters)")
|
||||
plt.xticks(rotation=90)
|
||||
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||
plt.ylabel("pvalue")
|
||||
plt.legend(loc="upper right")
|
||||
plt.savefig(outputdir + "/ks_averagesentiments_pval.png", bbox_inches='tight')
|
||||
|
||||
plt.close(fig)
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for i in range(len(single)):
|
||||
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.statistic for s in single[i]], label=str(i + 1) + " posts - most posters")
|
||||
plt.title("KS 2-sided test for sentiments (X posts to 95%tile posters)")
|
||||
plt.xticks(rotation=90)
|
||||
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||
plt.ylabel("statistic")
|
||||
plt.legend(loc="upper right")
|
||||
plt.savefig(outputdir + "/ks_averagesentiments_stat.png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plotbypost(onlyfiles, outputdir, intervl):
|
||||
print("plotbypost")
|
||||
files = defaultdict(list)
|
||||
|
||||
2
summary
2
summary
@@ -9,7 +9,7 @@ Data: The data sets are aquired from archive.org [https://archive.org/download/s
|
||||
- math.stackexchange.com (kaputt timeout)
|
||||
- mathoverflow.net
|
||||
- serverfault.com
|
||||
- stats.stackexchange.com (kaputt analyse_batch letzter plot, 42, 37 datapoints)
|
||||
- stats.stackexchange.com
|
||||
- stackoverflow.com (not yet)
|
||||
- superuser.com
|
||||
- tex.stackexchange.com
|
||||
|
||||
Reference in New Issue
Block a user