wip
This commit is contained in:
188
calctoxdiff.py
188
calctoxdiff.py
@@ -1,4 +1,3 @@
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
@@ -10,27 +9,36 @@ import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from scipy.stats import ks_2samp
|
||||
|
||||
from common import imprt
|
||||
|
||||
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
|
||||
IMAGE_MAGICK = "magick"
|
||||
|
||||
|
||||
def main(folder):
|
||||
outputdir = folder + "/output/ksbatch/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
folder = folder + "/output/batch/"
|
||||
srcfolder = folder + "/output/batch/"
|
||||
|
||||
onlyfiles = [folder + f for f in listdir(folder) if isfile(join(folder, f)) and f.endswith(".py")]
|
||||
onlyfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "newusers" in f]
|
||||
onlyfiles = sorted(onlyfiles)
|
||||
|
||||
plotbypost(onlyfiles, outputdir)
|
||||
plotbydate(onlyfiles, outputdir)
|
||||
# plotbypost(onlyfiles, outputdir)
|
||||
# plotbydate(onlyfiles, outputdir)
|
||||
|
||||
oldfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "oldusers" in f]
|
||||
oldfiles = sorted(oldfiles)
|
||||
|
||||
plotbydateold(onlyfiles, oldfiles, outputdir)
|
||||
|
||||
|
||||
def plotbypost(onlyfiles, outputdir):
|
||||
print("plotbypost")
|
||||
files = defaultdict(list)
|
||||
for f in onlyfiles:
|
||||
s = f[:-3].split("_")
|
||||
files[int(s[4])].append(f)
|
||||
files = {p: sorted(l, key=lambda e: datetime.strptime(e.split("_")[2], "%d-%m-%Y")) for (p, l) in files.items()}
|
||||
files[int(s[5])].append(f)
|
||||
files = {p: sorted(l, key=lambda e: datetime.strptime(e.split("_")[3], "%d-%m-%Y")) for (p, l) in files.items()}
|
||||
|
||||
changes_neg = defaultdict(list)
|
||||
changes_neu = defaultdict(list)
|
||||
@@ -45,15 +53,15 @@ def plotbypost(onlyfiles, outputdir):
|
||||
tox1 = imprt(l[i]).toxlevels
|
||||
tox2 = imprt(l[i + 1]).toxlevels
|
||||
|
||||
neglevelsflat1 = [item['neg'] for item in flatmap(tox1.values())]
|
||||
neulevelsflat1 = [item['neu'] for item in flatmap(tox1.values())]
|
||||
poslevelsflat1 = [item['pos'] for item in flatmap(tox1.values())]
|
||||
comlevelsflat1 = [item['compound'] for item in flatmap(tox1.values())]
|
||||
neglevelsflat1 = [item['neg'] for item in tox1]
|
||||
neulevelsflat1 = [item['neu'] for item in tox1]
|
||||
poslevelsflat1 = [item['pos'] for item in tox1]
|
||||
comlevelsflat1 = [item['compound'] for item in tox1]
|
||||
|
||||
neglevelsflat2 = [item['neg'] for item in flatmap(tox2.values())]
|
||||
neulevelsflat2 = [item['neu'] for item in flatmap(tox2.values())]
|
||||
poslevelsflat2 = [item['pos'] for item in flatmap(tox2.values())]
|
||||
comlevelsflat2 = [item['compound'] for item in flatmap(tox2.values())]
|
||||
neglevelsflat2 = [item['neg'] for item in tox2]
|
||||
neulevelsflat2 = [item['neu'] for item in tox2]
|
||||
poslevelsflat2 = [item['pos'] for item in tox2]
|
||||
comlevelsflat2 = [item['compound'] for item in tox2]
|
||||
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsflat2)
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsflat2)
|
||||
@@ -74,7 +82,7 @@ def plotbypost(onlyfiles, outputdir):
|
||||
+ "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n")
|
||||
# pval
|
||||
for (p, l) in files.items():
|
||||
x = [l[i].split("_")[2] + " -\n" + l[i + 1].split("_")[2] for i in range(len(l) - 1)]
|
||||
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items():
|
||||
pval = [x.pvalue for x in changes]
|
||||
@@ -92,7 +100,7 @@ def plotbypost(onlyfiles, outputdir):
|
||||
|
||||
# stat
|
||||
for (p, l) in files.items():
|
||||
x = [l[i].split("_")[2] + " -\n" + l[i + 1].split("_")[2] for i in range(len(l) - 1)]
|
||||
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items():
|
||||
stat = [x.statistic for x in changes]
|
||||
@@ -110,11 +118,12 @@ def plotbypost(onlyfiles, outputdir):
|
||||
|
||||
|
||||
def plotbydate(onlyfiles, outputdir):
|
||||
print("plotbydate")
|
||||
files = defaultdict(list)
|
||||
for f in onlyfiles:
|
||||
s = f[:-3].split("_")
|
||||
files[(s[2], s[3])].append(f)
|
||||
files = {d: sorted(l, key=lambda e: e.split("_")[4]) for (d, l) in files.items()}
|
||||
files[(s[3], s[4])].append(f)
|
||||
files = {d: sorted(l, key=lambda e: e.split("_")[5]) for (d, l) in files.items()}
|
||||
|
||||
changes_neg = defaultdict(list)
|
||||
changes_neu = defaultdict(list)
|
||||
@@ -129,15 +138,15 @@ def plotbydate(onlyfiles, outputdir):
|
||||
tox1 = imprt(l[i]).toxlevels
|
||||
tox2 = imprt(l[i + 1]).toxlevels
|
||||
|
||||
neglevelsflat1 = [item['neg'] for item in flatmap(tox1.values())]
|
||||
neulevelsflat1 = [item['neu'] for item in flatmap(tox1.values())]
|
||||
poslevelsflat1 = [item['pos'] for item in flatmap(tox1.values())]
|
||||
comlevelsflat1 = [item['compound'] for item in flatmap(tox1.values())]
|
||||
neglevelsflat1 = [item['neg'] for item in tox1]
|
||||
neulevelsflat1 = [item['neu'] for item in tox1]
|
||||
poslevelsflat1 = [item['pos'] for item in tox1]
|
||||
comlevelsflat1 = [item['compound'] for item in tox1]
|
||||
|
||||
neglevelsflat2 = [item['neg'] for item in flatmap(tox2.values())]
|
||||
neulevelsflat2 = [item['neu'] for item in flatmap(tox2.values())]
|
||||
poslevelsflat2 = [item['pos'] for item in flatmap(tox2.values())]
|
||||
comlevelsflat2 = [item['compound'] for item in flatmap(tox2.values())]
|
||||
neglevelsflat2 = [item['neg'] for item in tox2]
|
||||
neulevelsflat2 = [item['neu'] for item in tox2]
|
||||
poslevelsflat2 = [item['pos'] for item in tox2]
|
||||
comlevelsflat2 = [item['compound'] for item in tox2]
|
||||
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsflat2)
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsflat2)
|
||||
@@ -159,7 +168,7 @@ def plotbydate(onlyfiles, outputdir):
|
||||
|
||||
# pval
|
||||
for (d, l) in files.items():
|
||||
x = [l[i].split("_")[4][:-3] + "-" + l[i + 1].split("_")[4][:-3] for i in range(len(l) - 1)]
|
||||
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
pval = [x.pvalue for x in changes]
|
||||
@@ -177,7 +186,7 @@ def plotbydate(onlyfiles, outputdir):
|
||||
|
||||
# stat
|
||||
for (d, l) in files.items():
|
||||
x = [l[i].split("_")[4][:-3] + "-" + l[i + 1].split("_")[4][:-3] for i in range(len(l) - 1)]
|
||||
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
stat = [x.statistic for x in changes]
|
||||
@@ -194,15 +203,122 @@ def plotbydate(onlyfiles, outputdir):
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def imprt(file):
|
||||
spec = importlib.util.spec_from_file_location("module.name", file)
|
||||
foo = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(foo)
|
||||
return foo
|
||||
def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
print("plotbydateold")
|
||||
files = defaultdict(list)
|
||||
for f in onlyfiles:
|
||||
s = f[:-3].split("_")
|
||||
files[(s[3], s[4])].append(f)
|
||||
dates = sorted(files.keys(), key=lambda e: "-".join(reversed(e[0].split("-"))))
|
||||
files = {d: files[d] for d in dates}
|
||||
files = {d: sorted(l, key=lambda e: e.split("_")[5]) for (d, l) in files.items()}
|
||||
oldfiles = {(f[:-3].split("_")[3], f[:-3].split("_")[4]): f for f in oldfiles}
|
||||
|
||||
changes_neg = defaultdict(list)
|
||||
changes_neu = defaultdict(list)
|
||||
changes_pos = defaultdict(list)
|
||||
changes_com = defaultdict(list)
|
||||
|
||||
def flatmap(arr):
|
||||
return [item for sublist in arr for item in sublist]
|
||||
for (d, l) in files.items():
|
||||
print(d)
|
||||
toxold = imprt(oldfiles[d]).toxlevels
|
||||
|
||||
neglevelsold = [item['neg'] for item in toxold]
|
||||
neulevelsold = [item['neu'] for item in toxold]
|
||||
poslevelsold = [item['pos'] for item in toxold]
|
||||
comlevelsold = [item['compound'] for item in toxold]
|
||||
|
||||
for i in range(len(l)):
|
||||
tox1 = imprt(l[i]).toxlevels
|
||||
if len(tox1) == 0 or len(toxold) == 0:
|
||||
changes_neg[d].append(None)
|
||||
changes_neu[d].append(None)
|
||||
changes_pos[d].append(None)
|
||||
changes_com[d].append(None)
|
||||
continue
|
||||
|
||||
neglevelsflat1 = [item['neg'] for item in tox1]
|
||||
neulevelsflat1 = [item['neu'] for item in tox1]
|
||||
poslevelsflat1 = [item['pos'] for item in tox1]
|
||||
comlevelsflat1 = [item['compound'] for item in tox1]
|
||||
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsold)
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsold)
|
||||
kspos = ks_2samp(poslevelsflat1, poslevelsold)
|
||||
kscom = ks_2samp(comlevelsflat1, comlevelsold)
|
||||
|
||||
changes_neg[d].append(ksneg)
|
||||
changes_neu[d].append(ksneu)
|
||||
changes_pos[d].append(kspos)
|
||||
changes_com[d].append(kscom)
|
||||
|
||||
print("logs")
|
||||
for (d, l) in files.items():
|
||||
# print(d)
|
||||
# print("neg is: " + str(len(changes_neg[d])) + " should: " + str(len(l)))
|
||||
# print("neu is: " + str(len(changes_neu[d])) + " should: " + str(len(l)))
|
||||
# print("pos is: " + str(len(changes_pos[d])) + " should: " + str(len(l)))
|
||||
# print("com is: " + str(len(changes_com[d])) + " should: " + str(len(l)))
|
||||
f1 = oldfiles[d]
|
||||
with open(outputdir + "/ks_olddate_" + d[0] + "_" + d[1] + ".log", "w") as f:
|
||||
for i in range(len(l)):
|
||||
if changes_neg[d][i] is None:
|
||||
continue
|
||||
f2 = l[i]
|
||||
f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[d][i]) + "; ks neu = " + str(changes_neu[d][i])
|
||||
+ "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n")
|
||||
|
||||
# pval
|
||||
print("pval")
|
||||
imgmagickcmd = IMAGE_MAGICK
|
||||
for (d, l) in files.items():
|
||||
print(d)
|
||||
x = [l[i][:-3].split("_")[5] for i in range(len(l))]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
pval = [(xx, c.pvalue) for xx, c in zip(x, changes) if c is not None]
|
||||
plt.plot([p[0] for p in pval], [p[1] for p in pval], label=type + ".pval", color=colors[type])
|
||||
if len(pval) == 0:
|
||||
continue
|
||||
mean = np.mean([p[1] for p in pval])
|
||||
std = np.std([p[1] for p in pval])
|
||||
dev = [(xx, s) for (xx, s) in pval if s <= mean - std or s >= mean + std]
|
||||
plt.plot([p[0] for p in pval], [mean] * len(pval), color=colors[type], ls='dashed')
|
||||
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
|
||||
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
|
||||
plt.xticks(rotation=90)
|
||||
plt.legend(loc="upper right")
|
||||
outfile = outputdir + "/ks_olddate_pval_" + d[0] + "_" + d[1] + ".png"
|
||||
plt.savefig(outfile, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
imgmagickcmd += " " + outfile
|
||||
os.system(imgmagickcmd + " " + outputdir + "/ks_olddate_pval.pdf")
|
||||
|
||||
# stat
|
||||
print("stat")
|
||||
imgmagickcmd = IMAGE_MAGICK
|
||||
for (d, l) in files.items():
|
||||
print(d)
|
||||
x = [l[i][:-3].split("_")[5] for i in range(len(l))]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
stat = [(xx, c.statistic) for xx, c in zip(x, changes) if c is not None]
|
||||
plt.plot([p[0] for p in stat], [p[1] for p in stat], label=type + ".stat", color=colors[type])
|
||||
if len(stat) == 0:
|
||||
continue
|
||||
mean = np.mean([p[1] for p in stat])
|
||||
std = np.std([p[1] for p in stat])
|
||||
dev = [(xx, s) for (xx, s) in stat if s <= mean - std or s >= mean + std]
|
||||
plt.plot([p[0] for p in stat], [mean] * len(stat), color=colors[type], ls='dashed')
|
||||
plt.plot([dx[0] for dx in dev], [dx[1] for dx in dev], color=colors[type], ls='None', marker='o')
|
||||
plt.title("KS 2-sided test with new and old users between " + d[0] + " and " + d[1])
|
||||
plt.xticks(rotation=90)
|
||||
plt.legend(loc="upper right")
|
||||
outfile = outputdir + "/ks_olddate_stat_" + d[0] + "_" + d[1] + ".png"
|
||||
plt.savefig(outfile, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
imgmagickcmd += " " + outfile
|
||||
os.system(imgmagickcmd + " " + outputdir + "/ks_olddate_stat.pdf")
|
||||
|
||||
|
||||
def filecmp(file1, file2):
|
||||
|
||||
Reference in New Issue
Block a user