wip
This commit is contained in:
@@ -9,16 +9,22 @@ import matplotlib.pyplot as plt
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.stats import ks_2samp
|
from scipy.stats import ks_2samp
|
||||||
|
|
||||||
from common import imprt, IMAGE_MAGICK
|
from analyze_batch import readavgsentsingle
|
||||||
|
from common import imprt, IMAGE_MAGICK, calc_intervals
|
||||||
|
from loader import load
|
||||||
|
|
||||||
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
|
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
|
||||||
|
|
||||||
|
|
||||||
def main(folder, intervl):
|
def main(folder, intervl):
|
||||||
|
users, posts, firstcontrib, sumcontrib = load(folder)
|
||||||
|
|
||||||
outputdir = folder + "/output/ksbatch/"
|
outputdir = folder + "/output/ksbatch/"
|
||||||
os.system("mkdir -p " + outputdir)
|
os.system("mkdir -p " + outputdir)
|
||||||
srcfolder = folder + "/output/batch/"
|
srcfolder = folder + "/output/batch/"
|
||||||
|
|
||||||
|
g(srcfolder + "/averagesentiment.txt", outputdir, calc_intervals(posts, intervl))
|
||||||
|
|
||||||
onlyfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "newusers" in f and "i" + str(intervl) in f]
|
onlyfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "newusers" in f and "i" + str(intervl) in f]
|
||||||
onlyfiles = sorted(onlyfiles)
|
onlyfiles = sorted(onlyfiles)
|
||||||
|
|
||||||
@@ -31,6 +37,47 @@ def main(folder, intervl):
|
|||||||
plotbydateold(onlyfiles, oldfiles, outputdir, intervl)
|
plotbydateold(onlyfiles, oldfiles, outputdir, intervl)
|
||||||
|
|
||||||
|
|
||||||
|
class fake:
|
||||||
|
def __init__(self, p, s):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def g(srcfile, outputdir, intervals):
|
||||||
|
print("ks global")
|
||||||
|
avgss2 = readavgsentsingle(srcfile)
|
||||||
|
|
||||||
|
kscom = []
|
||||||
|
single = []
|
||||||
|
for i in range(1, 6):
|
||||||
|
kscom.append(ks_2samp([np.mean(x) if len(x) > 0 else float("nan") for x in avgss2[0]], [np.mean(x) if len(x) > 0 else float("nan") for x in avgss2[i]]))
|
||||||
|
s = []
|
||||||
|
for j in range(len(avgss2[0])):
|
||||||
|
s.append(ks_2samp(avgss2[0][j], avgss2[i][j]) if len(avgss2[i][j]) > 0 and len(avgss2[0][j]) else float("nan"))
|
||||||
|
single.append(s)
|
||||||
|
|
||||||
|
fig = plt.figure(figsize=(16, 12))
|
||||||
|
for i in range(len(single)):
|
||||||
|
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.pvalue for s in single[i]], label=str(i + 1) + " posts - most posters")
|
||||||
|
plt.title("KS 2-sided test for sentiments (X posts to 95%tile posters)")
|
||||||
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||||
|
plt.ylabel("pvalue")
|
||||||
|
plt.legend(loc="upper right")
|
||||||
|
plt.savefig(outputdir + "/ks_averagesentiments_pval.png", bbox_inches='tight')
|
||||||
|
|
||||||
|
plt.close(fig)
|
||||||
|
fig = plt.figure(figsize=(16, 12))
|
||||||
|
for i in range(len(single)):
|
||||||
|
plt.plot([iv[0] for iv in intervals], [s if isinstance(s, float) else s.statistic for s in single[i]], label=str(i + 1) + " posts - most posters")
|
||||||
|
plt.title("KS 2-sided test for sentiments (X posts to 95%tile posters)")
|
||||||
|
plt.xticks(rotation=90)
|
||||||
|
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||||
|
plt.ylabel("statistic")
|
||||||
|
plt.legend(loc="upper right")
|
||||||
|
plt.savefig(outputdir + "/ks_averagesentiments_stat.png", bbox_inches='tight')
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
def plotbypost(onlyfiles, outputdir, intervl):
|
def plotbypost(onlyfiles, outputdir, intervl):
|
||||||
print("plotbypost")
|
print("plotbypost")
|
||||||
files = defaultdict(list)
|
files = defaultdict(list)
|
||||||
|
|||||||
2
summary
2
summary
@@ -9,7 +9,7 @@ Data: The data sets are aquired from archive.org [https://archive.org/download/s
|
|||||||
- math.stackexchange.com (kaputt timeout)
|
- math.stackexchange.com (kaputt timeout)
|
||||||
- mathoverflow.net
|
- mathoverflow.net
|
||||||
- serverfault.com
|
- serverfault.com
|
||||||
- stats.stackexchange.com (kaputt analyse_batch letzter plot, 42, 37 datapoints)
|
- stats.stackexchange.com
|
||||||
- stackoverflow.com (not yet)
|
- stackoverflow.com (not yet)
|
||||||
- superuser.com
|
- superuser.com
|
||||||
- tex.stackexchange.com
|
- tex.stackexchange.com
|
||||||
|
|||||||
Reference in New Issue
Block a user