import importlib import sys import os from os import listdir from os.path import isfile, join from scipy.stats import ks_2samp from collections import defaultdict from datetime import datetime import matplotlib.pyplot as plt import numpy as np colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'} def main(folder): if folder.endswith("/"): folder = folder[:-1] onlyfiles = [f for f in listdir(folder)] onlyfiles = [f for f in onlyfiles if isfile(join(folder, f))] onlyfiles = [f for f in onlyfiles if f.endswith(".py")] # onlyfiles = [f[:-3] for f in onlyfiles] # onlyfiles = [f.replace(".", "\.") for f in onlyfiles] onlyfiles = sorted(onlyfiles) plotbypost(onlyfiles) plotbydate(onlyfiles) def plotbypost(onlyfiles): files = defaultdict(list) for f in onlyfiles: s = f[:-3].split("_") files[int(s[4])].append(f) files = {p: sorted(l, key=lambda e: datetime.strptime(e.split("_")[2], "%d-%m-%Y")) for (p, l) in files.items()} changes_neg = defaultdict(list) changes_neu = defaultdict(list) changes_pos = defaultdict(list) changes_com = defaultdict(list) for (p, l) in files.items(): if len(l) < 2: continue print(p) for i in range(len(l) - 1): tox1 = imprt(folder + "/" + l[i]).toxlevels tox2 = imprt(folder + "/" + l[i + 1]).toxlevels neglevelsflat1 = [item['neg'] for item in flatmap(tox1.values())] neulevelsflat1 = [item['neu'] for item in flatmap(tox1.values())] poslevelsflat1 = [item['pos'] for item in flatmap(tox1.values())] comlevelsflat1 = [item['compound'] for item in flatmap(tox1.values())] neglevelsflat2 = [item['neg'] for item in flatmap(tox2.values())] neulevelsflat2 = [item['neu'] for item in flatmap(tox2.values())] poslevelsflat2 = [item['pos'] for item in flatmap(tox2.values())] comlevelsflat2 = [item['compound'] for item in flatmap(tox2.values())] ksneg = ks_2samp(neglevelsflat1, neglevelsflat2) ksneu = ks_2samp(neulevelsflat1, neulevelsflat2) kspos = ks_2samp(poslevelsflat1, poslevelsflat2) kscom = ks_2samp(comlevelsflat1, comlevelsflat2) changes_neg[p].append(ksneg) changes_neu[p].append(ksneu) changes_pos[p].append(kspos) changes_com[p].append(kscom) for (p, l) in files.items(): with open(folder + "/ks_" + str(p) + ".log", "w") as f: for i in range(len(l) - 1): f1 = l[i] f2 = l[i + 1] f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[p][i]) + "; ks neu = " + str(changes_neu[p][i]) + "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n") # pval for (p, l) in files.items(): x = [l[i].split("_")[2] + " -\n" + l[i + 1].split("_")[2] for i in range(len(l) - 1)] fig = plt.figure(figsize=(16, 12)) for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items(): pval = [x.pvalue for x in changes] plt.plot(x, pval, label=type + ".pval", color=colors[type]) mean = np.mean(pval) std = np.std(pval) dev = [(xx, s) for (xx, s) in zip(x, pval) if s <= mean - std or s >= mean + std] plt.plot(x, [mean] * len(pval), color=colors[type], ls='dashed') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.title("KS 2-sided test with max " + str(p) + " posts") plt.xticks(rotation=90) plt.legend(loc="upper right") plt.savefig(folder + "/ks_pval_" + str(p) + ".png", bbox_inches='tight') plt.close(fig) # stat for (p, l) in files.items(): x = [l[i].split("_")[2] + " -\n" + l[i + 1].split("_")[2] for i in range(len(l) - 1)] fig = plt.figure(figsize=(16, 12)) for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items(): stat = [x.statistic for x in changes] plt.plot(x, stat, label=type + ".stat", color=colors[type]) mean = np.mean(stat) std = np.std(stat) dev = [(xx, s) for (xx, s) in zip(x, stat) if s <= mean - std or s >= mean + std] plt.plot(x, [mean] * len(stat), color=colors[type], ls='dashed') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.title("KS 2-sided test with max " + str(p) + " posts") plt.xticks(rotation=90) plt.legend(loc="upper right") plt.savefig(folder + "/ks_stat_" + str(p) + ".png", bbox_inches='tight') plt.close(fig) def plotbydate(onlyfiles): files = defaultdict(list) for f in onlyfiles: s = f[:-3].split("_") files[(s[2], s[3])].append(f) files = {d: sorted(l, key=lambda e: e.split("_")[4]) for (d, l) in files.items()} changes_neg = defaultdict(list) changes_neu = defaultdict(list) changes_pos = defaultdict(list) changes_com = defaultdict(list) for (d, l) in files.items(): if len(l) < 2: continue print(d) for i in range(len(l) - 1): tox1 = imprt(folder + "/" + l[i]).toxlevels tox2 = imprt(folder + "/" + l[i + 1]).toxlevels neglevelsflat1 = [item['neg'] for item in flatmap(tox1.values())] neulevelsflat1 = [item['neu'] for item in flatmap(tox1.values())] poslevelsflat1 = [item['pos'] for item in flatmap(tox1.values())] comlevelsflat1 = [item['compound'] for item in flatmap(tox1.values())] neglevelsflat2 = [item['neg'] for item in flatmap(tox2.values())] neulevelsflat2 = [item['neu'] for item in flatmap(tox2.values())] poslevelsflat2 = [item['pos'] for item in flatmap(tox2.values())] comlevelsflat2 = [item['compound'] for item in flatmap(tox2.values())] ksneg = ks_2samp(neglevelsflat1, neglevelsflat2) ksneu = ks_2samp(neulevelsflat1, neulevelsflat2) kspos = ks_2samp(poslevelsflat1, poslevelsflat2) kscom = ks_2samp(comlevelsflat1, comlevelsflat2) changes_neg[d].append(ksneg) changes_neu[d].append(ksneu) changes_pos[d].append(kspos) changes_com[d].append(kscom) for (d, l) in files.items(): with open(folder + "/ks_" + d[0] + "_" + d[1] + ".log", "w") as f: for i in range(len(l) - 1): f1 = l[i] f2 = l[i + 1] f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[d][i]) + "; ks neu = " + str(changes_neu[d][i]) + "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n") # pval for (d, l) in files.items(): x = [l[i].split("_")[4][:-3] + "-" + l[i + 1].split("_")[4][:-3] for i in range(len(l) - 1)] fig = plt.figure(figsize=(16, 12)) for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items(): pval = [x.pvalue for x in changes] plt.plot(x, pval, label=type + ".pval", color=colors[type]) mean = np.mean(pval) std = np.std(pval) dev = [(xx, s) for (xx, s) in zip(x, pval) if s <= mean - std or s >= mean + std] plt.plot(x, [mean] * len(pval), color=colors[type], ls='dashed') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.title("KS 2-sided test with between " + d[0] + " and " + d[1]) plt.xticks(rotation=90) plt.legend(loc="upper right") plt.savefig(folder + "/ks_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight') plt.close(fig) # stat for (d, l) in files.items(): x = [l[i].split("_")[4][:-3] + "-" + l[i + 1].split("_")[4][:-3] for i in range(len(l) - 1)] fig = plt.figure(figsize=(16, 12)) for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items(): stat = [x.statistic for x in changes] plt.plot(x, stat, label=type + ".stat", color=colors[type]) mean = np.mean(stat) std = np.std(stat) dev = [(xx, s) for (xx, s) in zip(x, stat) if s <= mean - std or s >= mean + std] plt.plot(x, [mean] * len(stat), color=colors[type], ls='dashed') plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o') plt.title("KS 2-sided test with between " + d[0] + " and " + d[1]) plt.xticks(rotation=90) plt.legend(loc="upper right") plt.savefig(folder + "/ks_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight') plt.close(fig) def imprt(file): spec = importlib.util.spec_from_file_location("module.name", file) foo = importlib.util.module_from_spec(spec) spec.loader.exec_module(foo) return foo def flatmap(arr): return [item for sublist in arr for item in sublist] def filecmp(file1, file2): if file1 == file2: return 0 s1 = file1.split("_") s2 = file2.split("_") d1 = datetime.strptime(s1[2], "%d-%m-%Y") d2 = datetime.strptime(s2[2], "%d-%m-%Y") if d1 < d2: return -1 elif d1 > d2: return 1 return 0 if __name__ == "__main__": # execute only if run as a script usage = sys.argv[0] + " " if len(sys.argv) < 2: print(usage) sys.exit(1) folder = sys.argv[1] if not os.path.isdir(folder): print(folder + " is not a folder") sys.exit(1) main(folder)