wip
This commit is contained in:
@@ -54,8 +54,21 @@ def main(folder, intervl):
|
||||
gpos = []
|
||||
gcom = []
|
||||
|
||||
goutfilenamenewusers = outputdir + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
goutfilenameoldusers = outputdir + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
|
||||
goutfilenamenewusers = outputdir + "batch_newusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") + "_i" + str(intervl)
|
||||
goutfilenameoldusers = outputdir + "batch_oldusers_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") + "_i" + str(intervl)
|
||||
|
||||
start = cms()
|
||||
printnoln("sorting posts ...")
|
||||
sortedposts = defaultdict(list)
|
||||
for (i, post) in enumerate(newposts):
|
||||
userid = post['OwnerUserId']
|
||||
|
||||
# check first contribution
|
||||
if firstcontrib[userid] + timedelta(days=DAYS_NEW_USER) < post['CreationDate']:
|
||||
continue
|
||||
|
||||
sortedposts[userid].append(post)
|
||||
rprint("sorting posts ... took " + str(cms() - start) + "ms")
|
||||
|
||||
for option_posts in postcounts:
|
||||
# print(option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y") + " - #posts: " + str(option_posts))
|
||||
@@ -64,22 +77,8 @@ def main(folder, intervl):
|
||||
start = cms()
|
||||
printnoln("computing toxic levels: filtering")
|
||||
toxlevels = []
|
||||
searchedposts = defaultdict(int)
|
||||
filteredposts = []
|
||||
for (i, post) in enumerate(newposts):
|
||||
userid = post['OwnerUserId']
|
||||
|
||||
# check first contribution
|
||||
if firstcontrib[userid] + timedelta(days=DAYS_NEW_USER) < post['CreationDate']:
|
||||
continue
|
||||
|
||||
# no more than option_posts posts from one user
|
||||
searchedposts[userid] += 1
|
||||
if searchedposts[userid] > option_posts:
|
||||
continue
|
||||
|
||||
filteredposts.append(post)
|
||||
|
||||
filteredposts = [posts for (_, posts) in sortedposts.items() if len(posts) == option_posts]
|
||||
filteredposts = [p for posts in filteredposts for p in posts]
|
||||
for (i, post) in enumerate(filteredposts):
|
||||
printnoln("\rcomputing toxic levels: post " + str(i + 1) + "/" + str(len(filteredposts)))
|
||||
for a in post['Answers']:
|
||||
@@ -87,6 +86,7 @@ def main(folder, intervl):
|
||||
toxlevel = cachedsentiments[a['Id']]
|
||||
else:
|
||||
print("Sentiment not found for " + a['Id'])
|
||||
continue
|
||||
toxlevels.append(toxlevel)
|
||||
printnoln("\rcomputing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ...")
|
||||
|
||||
@@ -206,10 +206,10 @@ def main(folder, intervl):
|
||||
rprint("computing toxic levels: post " + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... plotting ... saving ... took " + str(cms() - start) + "ms")
|
||||
magickold += " " + goutfilenameoldusers + ".png"
|
||||
|
||||
os.system(magickglobal + " " + outputdir + "batch_newusers.pdf")
|
||||
os.system(magickold + " " + outputdir + "batch_oldusers.pdf")
|
||||
os.system(magickglobal + " " + outputdir + "batch_newusers_i" + str(intervl) + ".pdf")
|
||||
os.system(magickold + " " + outputdir + "batch_oldusers_i" + str(intervl) + ".pdf")
|
||||
for (i, cmd) in magickpost.items():
|
||||
os.system(cmd + " " + outputdir + "batch_newusers_" + str(i) + ".pdf")
|
||||
os.system(cmd + " " + outputdir + "batch_newusers_i" + str(intervl) + "_" + str(i) + ".pdf")
|
||||
|
||||
|
||||
def dumptoxlevels(lvls, filename):
|
||||
|
||||
145
calctoxdiff.py
145
calctoxdiff.py
@@ -14,29 +14,29 @@ from common import imprt, IMAGE_MAGICK
|
||||
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
|
||||
|
||||
|
||||
def main(folder):
|
||||
def main(folder, intervl):
|
||||
outputdir = folder + "/output/ksbatch/"
|
||||
os.system("mkdir -p " + outputdir)
|
||||
srcfolder = folder + "/output/batch/"
|
||||
|
||||
onlyfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "newusers" in f]
|
||||
onlyfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "newusers" in f and "i" + str(intervl) in f]
|
||||
onlyfiles = sorted(onlyfiles)
|
||||
|
||||
plotbypost(onlyfiles, outputdir)
|
||||
plotbydate(onlyfiles, outputdir)
|
||||
plotbypost(onlyfiles, outputdir, intervl)
|
||||
plotbydate(onlyfiles, outputdir, intervl)
|
||||
|
||||
oldfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "oldusers" in f]
|
||||
oldfiles = [srcfolder + f for f in listdir(srcfolder) if isfile(join(srcfolder, f)) and f.endswith(".py") and "oldusers" in f and "i" + str(intervl) in f]
|
||||
oldfiles = sorted(oldfiles)
|
||||
|
||||
plotbydateold(onlyfiles, oldfiles, outputdir)
|
||||
plotbydateold(onlyfiles, oldfiles, outputdir, intervl)
|
||||
|
||||
|
||||
def plotbypost(onlyfiles, outputdir):
|
||||
def plotbypost(onlyfiles, outputdir, intervl):
|
||||
print("plotbypost")
|
||||
files = defaultdict(list)
|
||||
for f in onlyfiles:
|
||||
s = f[:-3].split("_")
|
||||
files[int(s[5])].append(f)
|
||||
files[int(s[6])].append(f)
|
||||
files = {p: sorted(l, key=lambda e: datetime.strptime(e.split("_")[3], "%d-%m-%Y")) for (p, l) in files.items()}
|
||||
|
||||
changes_neg = defaultdict(list)
|
||||
@@ -62,10 +62,10 @@ def plotbypost(onlyfiles, outputdir):
|
||||
poslevelsflat2 = [item['pos'] for item in tox2]
|
||||
comlevelsflat2 = [item['compound'] for item in tox2]
|
||||
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsflat2)
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsflat2)
|
||||
kspos = ks_2samp(poslevelsflat1, poslevelsflat2)
|
||||
kscom = ks_2samp(comlevelsflat1, comlevelsflat2)
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsflat2) if len(neglevelsflat1) > 0 and len(neglevelsflat2) > 0 else "no values"
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsflat2) if len(neulevelsflat1) > 0 and len(neulevelsflat2) > 0 else "no values"
|
||||
kspos = ks_2samp(poslevelsflat1, poslevelsflat2) if len(poslevelsflat1) > 0 and len(poslevelsflat2) > 0 else "no values"
|
||||
kscom = ks_2samp(comlevelsflat1, comlevelsflat2) if len(comlevelsflat1) > 0 and len(comlevelsflat2) > 0 else "no values"
|
||||
|
||||
changes_neg[p].append(ksneg)
|
||||
changes_neu[p].append(ksneu)
|
||||
@@ -73,7 +73,7 @@ def plotbypost(onlyfiles, outputdir):
|
||||
changes_com[p].append(kscom)
|
||||
|
||||
for (p, l) in files.items():
|
||||
with open(outputdir + "/ks_post_" + str(p) + ".log", "w") as f:
|
||||
with open(outputdir + "/ks_post_i" + str(intervl) + "_" + str(p) + ".log", "w") as f:
|
||||
for i in range(len(l) - 1):
|
||||
f1 = l[i]
|
||||
f2 = l[i + 1]
|
||||
@@ -85,11 +85,12 @@ def plotbypost(onlyfiles, outputdir):
|
||||
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items():
|
||||
pval = [x.pvalue for x in changes]
|
||||
pval = [x.pvalue if not isinstance(x, str) else None for x in changes]
|
||||
pvalnotnull = [x for x in pval if x is not None]
|
||||
plt.plot(x, pval, label=type + ".pval", color=colors[type])
|
||||
mean = np.mean(pval)
|
||||
std = np.std(pval)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, pval) if s <= mean - std or s >= mean + std]
|
||||
mean = np.mean(pvalnotnull)
|
||||
std = np.std(pvalnotnull)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, pval) if s is not None and (s <= mean - std or s >= mean + std)]
|
||||
plt.plot(x, [mean] * len(pval), color=colors[type], ls='dashed')
|
||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||
plt.title("KS 2-sided test with max " + str(p) + " posts")
|
||||
@@ -97,10 +98,10 @@ def plotbypost(onlyfiles, outputdir):
|
||||
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||
plt.ylabel("p-value")
|
||||
plt.legend(loc="upper right")
|
||||
plt.savefig(outputdir + "/ks_post_pval_" + str(p) + ".png", bbox_inches='tight')
|
||||
plt.savefig(outputdir + "/ks_post_pval_i" + str(intervl) + "_" + str(p) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
magick += " " + outputdir + "/ks_post_pval_" + str(p) + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_post_pval.pdf")
|
||||
magick += " " + outputdir + "/ks_post_pval_i" + str(intervl) + "_" + str(p) + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_post_pval_i" + str(intervl) + ".pdf")
|
||||
|
||||
# stat
|
||||
magick = IMAGE_MAGICK
|
||||
@@ -108,11 +109,12 @@ def plotbypost(onlyfiles, outputdir):
|
||||
x = [l[i].split("_")[3] + " -\n" + l[i + 1].split("_")[3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items():
|
||||
stat = [x.statistic for x in changes]
|
||||
stat = [x.statistic if not isinstance(x, str) else None for x in changes]
|
||||
statnotnull = [x for x in stat if x is not None]
|
||||
plt.plot(x, stat, label=type + ".stat", color=colors[type])
|
||||
mean = np.mean(stat)
|
||||
std = np.std(stat)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, stat) if s <= mean - std or s >= mean + std]
|
||||
mean = np.mean(statnotnull)
|
||||
std = np.std(statnotnull)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, stat) if s is not None and (s <= mean - std or s >= mean + std)]
|
||||
plt.plot(x, [mean] * len(stat), color=colors[type], ls='dashed')
|
||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||
plt.title("KS 2-sided test with max " + str(p) + " posts")
|
||||
@@ -120,13 +122,13 @@ def plotbypost(onlyfiles, outputdir):
|
||||
plt.xlabel("Comparision: time frame X - time frame X+1")
|
||||
plt.ylabel("stat value")
|
||||
plt.legend(loc="upper right")
|
||||
plt.savefig(outputdir + "/ks_post_stat_" + str(p) + ".png", bbox_inches='tight')
|
||||
plt.savefig(outputdir + "/ks_post_stat_i" + str(intervl) + "_" + str(p) + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
magick += " " + outputdir + "/ks_post_stat_" + str(p) + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_post_stat.pdf")
|
||||
magick += " " + outputdir + "/ks_post_stat_i" + str(intervl) + "_" + str(p) + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_post_stat_i" + str(intervl) + ".pdf")
|
||||
|
||||
|
||||
def plotbydate(onlyfiles, outputdir):
|
||||
def plotbydate(onlyfiles, outputdir, intervl):
|
||||
print("plotbydate")
|
||||
files = defaultdict(list)
|
||||
for f in onlyfiles:
|
||||
@@ -159,10 +161,10 @@ def plotbydate(onlyfiles, outputdir):
|
||||
poslevelsflat2 = [item['pos'] for item in tox2]
|
||||
comlevelsflat2 = [item['compound'] for item in tox2]
|
||||
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsflat2)
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsflat2)
|
||||
kspos = ks_2samp(poslevelsflat1, poslevelsflat2)
|
||||
kscom = ks_2samp(comlevelsflat1, comlevelsflat2)
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsflat2) if len(neglevelsflat1) > 0 and len(neglevelsflat2) > 0 else "no values"
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsflat2) if len(neulevelsflat1) > 0 and len(neulevelsflat2) > 0 else "no values"
|
||||
kspos = ks_2samp(poslevelsflat1, poslevelsflat2) if len(poslevelsflat1) > 0 and len(poslevelsflat2) > 0 else "no values"
|
||||
kscom = ks_2samp(comlevelsflat1, comlevelsflat2) if len(comlevelsflat1) > 0 and len(comlevelsflat2) > 0 else "no values"
|
||||
|
||||
changes_neg[d].append(ksneg)
|
||||
changes_neu[d].append(ksneu)
|
||||
@@ -170,7 +172,7 @@ def plotbydate(onlyfiles, outputdir):
|
||||
changes_com[d].append(kscom)
|
||||
|
||||
for (d, l) in files.items():
|
||||
with open(outputdir + "/ks_date_" + d[0] + "_" + d[1] + ".log", "w") as f:
|
||||
with open(outputdir + "/ks_date_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".log", "w") as f:
|
||||
for i in range(len(l) - 1):
|
||||
f1 = l[i]
|
||||
f2 = l[i + 1]
|
||||
@@ -180,14 +182,15 @@ def plotbydate(onlyfiles, outputdir):
|
||||
# pval
|
||||
magick = IMAGE_MAGICK
|
||||
for (d, l) in files.items():
|
||||
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
||||
x = [l[i].split("_")[6][:-3] + "-" + l[i + 1].split("_")[6][:-3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
pval = [x.pvalue for x in changes]
|
||||
pval = [x.pvalue if not isinstance(x, str) else None for x in changes]
|
||||
pvalnotnull = [x for x in pval if x is not None]
|
||||
plt.plot(x, pval, label=type + ".pval", color=colors[type])
|
||||
mean = np.mean(pval)
|
||||
std = np.std(pval)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, pval) if s <= mean - std or s >= mean + std]
|
||||
mean = np.mean(pvalnotnull)
|
||||
std = np.std(pvalnotnull)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, pval) if s is not None and (s <= mean - std or s >= mean + std)]
|
||||
plt.plot(x, [mean] * len(pval), color=colors[type], ls='dashed')
|
||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
|
||||
@@ -195,22 +198,23 @@ def plotbydate(onlyfiles, outputdir):
|
||||
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
|
||||
plt.ylabel("p-value")
|
||||
plt.legend(loc="upper right")
|
||||
plt.savefig(outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
||||
plt.savefig(outputdir + "/ks_date_pval_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
magick += " " + outputdir + "/ks_date_pval_" + d[0] + "_" + d[1] + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_date_pval.pdf")
|
||||
magick += " " + outputdir + "/ks_date_pval_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_date_pval_i" + str(intervl) + ".pdf")
|
||||
|
||||
# stat
|
||||
magick = IMAGE_MAGICK
|
||||
for (d, l) in files.items():
|
||||
x = [l[i].split("_")[5][:-3] + "-" + l[i + 1].split("_")[5][:-3] for i in range(len(l) - 1)]
|
||||
x = [l[i].split("_")[6][:-3] + "-" + l[i + 1].split("_")[6][:-3] for i in range(len(l) - 1)]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
stat = [x.statistic for x in changes]
|
||||
stat = [x.statistic if not isinstance(x, str) else None for x in changes]
|
||||
statnotnull = [x for x in stat if x is not None]
|
||||
plt.plot(x, stat, label=type + ".stat", color=colors[type])
|
||||
mean = np.mean(stat)
|
||||
std = np.std(stat)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, stat) if s <= mean - std or s >= mean + std]
|
||||
mean = np.mean(statnotnull)
|
||||
std = np.std(statnotnull)
|
||||
dev = [(xx, s) for (xx, s) in zip(x, stat) if s is not None and (s <= mean - std or s >= mean + std)]
|
||||
plt.plot(x, [mean] * len(stat), color=colors[type], ls='dashed')
|
||||
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
|
||||
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
|
||||
@@ -218,13 +222,13 @@ def plotbydate(onlyfiles, outputdir):
|
||||
plt.xlabel("Comparision: X (max) posts - X+1 (max) posts")
|
||||
plt.ylabel("stat value")
|
||||
plt.legend(loc="upper right")
|
||||
plt.savefig(outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
||||
plt.savefig(outputdir + "/ks_date_stat_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
magick += " " + outputdir + "/ks_date_stat_" + d[0] + "_" + d[1] + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_date_stat.pdf")
|
||||
magick += " " + outputdir + "/ks_date_stat_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".png"
|
||||
os.system(magick + " " + outputdir + "/ks_date_stat_i" + str(intervl) + ".pdf")
|
||||
|
||||
|
||||
def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
def plotbydateold(onlyfiles, oldfiles, outputdir, intervl):
|
||||
print("plotbydateold")
|
||||
files = defaultdict(list)
|
||||
for f in onlyfiles:
|
||||
@@ -232,7 +236,7 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
files[(s[3], s[4])].append(f)
|
||||
dates = sorted(files.keys(), key=lambda e: "-".join(reversed(e[0].split("-"))))
|
||||
files = {d: files[d] for d in dates}
|
||||
files = {d: sorted(l, key=lambda e: e.split("_")[5]) for (d, l) in files.items()}
|
||||
files = {d: sorted(l, key=lambda e: e.split("_")[6]) for (d, l) in files.items()}
|
||||
oldfiles = {(f[:-3].split("_")[3], f[:-3].split("_")[4]): f for f in oldfiles}
|
||||
|
||||
changes_neg = defaultdict(list)
|
||||
@@ -263,10 +267,10 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
poslevelsflat1 = [item['pos'] for item in tox1]
|
||||
comlevelsflat1 = [item['compound'] for item in tox1]
|
||||
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsold)
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsold)
|
||||
kspos = ks_2samp(poslevelsflat1, poslevelsold)
|
||||
kscom = ks_2samp(comlevelsflat1, comlevelsold)
|
||||
ksneg = ks_2samp(neglevelsflat1, neglevelsold) if len(neglevelsflat1) > 0 and len(neglevelsold) > 0 else "no values"
|
||||
ksneu = ks_2samp(neulevelsflat1, neulevelsold) if len(neulevelsflat1) > 0 and len(neulevelsold) > 0 else "no values"
|
||||
kspos = ks_2samp(poslevelsflat1, poslevelsold) if len(poslevelsflat1) > 0 and len(poslevelsold) > 0 else "no values"
|
||||
kscom = ks_2samp(comlevelsflat1, comlevelsold) if len(comlevelsflat1) > 0 and len(comlevelsold) > 0 else "no values"
|
||||
|
||||
changes_neg[d].append(ksneg)
|
||||
changes_neu[d].append(ksneu)
|
||||
@@ -276,7 +280,7 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
print("logs")
|
||||
for (d, l) in files.items():
|
||||
f1 = oldfiles[d]
|
||||
with open(outputdir + "/ks_olddate_" + d[0] + "_" + d[1] + ".log", "w") as f:
|
||||
with open(outputdir + "/ks_olddate_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".log", "w") as f:
|
||||
for i in range(len(l)):
|
||||
if changes_neg[d][i] is None:
|
||||
continue
|
||||
@@ -289,7 +293,7 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
imgmagickcmd = IMAGE_MAGICK
|
||||
for (d, l) in files.items():
|
||||
print(d)
|
||||
x = [l[i][:-3].split("_")[5] for i in range(len(l))]
|
||||
x = [l[i][:-3].split("_")[6] for i in range(len(l))]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
pval = [(xx, c.pvalue) for xx, c in zip(x, changes) if c is not None]
|
||||
@@ -306,18 +310,18 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
plt.xlabel("Comparision: new users X (max) posts - old users posts")
|
||||
plt.ylabel("p-value")
|
||||
plt.legend(loc="upper right")
|
||||
outfile = outputdir + "/ks_olddate_pval_" + d[0] + "_" + d[1] + ".png"
|
||||
outfile = outputdir + "/ks_olddate_pval_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".png"
|
||||
plt.savefig(outfile, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
imgmagickcmd += " " + outfile
|
||||
os.system(imgmagickcmd + " " + outputdir + "/ks_olddate_pval.pdf")
|
||||
os.system(imgmagickcmd + " " + outputdir + "/ks_olddate_pval_i" + str(intervl) + ".pdf")
|
||||
|
||||
# stat
|
||||
print("stat")
|
||||
imgmagickcmd = IMAGE_MAGICK
|
||||
for (d, l) in files.items():
|
||||
print(d)
|
||||
x = [l[i][:-3].split("_")[5] for i in range(len(l))]
|
||||
x = [l[i][:-3].split("_")[6] for i in range(len(l))]
|
||||
fig = plt.figure(figsize=(16, 12))
|
||||
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
|
||||
stat = [(xx, c.statistic) for xx, c in zip(x, changes) if c is not None]
|
||||
@@ -334,11 +338,11 @@ def plotbydateold(onlyfiles, oldfiles, outputdir):
|
||||
plt.xlabel("Comparision: new users X (max) posts - old users posts")
|
||||
plt.ylabel("stat value")
|
||||
plt.legend(loc="upper right")
|
||||
outfile = outputdir + "/ks_olddate_stat_" + d[0] + "_" + d[1] + ".png"
|
||||
outfile = outputdir + "/ks_olddate_stat_i" + str(intervl) + "_" + d[0] + "_" + d[1] + ".png"
|
||||
plt.savefig(outfile, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
imgmagickcmd += " " + outfile
|
||||
os.system(imgmagickcmd + " " + outputdir + "/ks_olddate_stat.pdf")
|
||||
os.system(imgmagickcmd + " " + outputdir + "/ks_olddate_stat_i" + str(intervl) + ".pdf")
|
||||
|
||||
|
||||
def filecmp(file1, file2):
|
||||
@@ -365,5 +369,20 @@ if __name__ == "__main__":
|
||||
if not os.path.isdir(folder):
|
||||
print(folder + " is not a folder")
|
||||
sys.exit(1)
|
||||
interval = 3
|
||||
if len(sys.argv) >= 3:
|
||||
if sys.argv[2].startswith("-i"):
|
||||
interval = sys.argv[2][2:]
|
||||
try:
|
||||
interval = int(interval)
|
||||
except ValueError:
|
||||
print("-i: int required")
|
||||
sys.exit(1)
|
||||
if interval < 1 or interval > 12:
|
||||
print("-i: only 1 - 12")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("unknown parameter: " + sys.argv[2])
|
||||
sys.exit(1)
|
||||
|
||||
main(folder)
|
||||
main(folder, interval)
|
||||
|
||||
Reference in New Issue
Block a user