From c47260c0bbb785df40dd2339295b923fbbdc6fa4 Mon Sep 17 00:00:00 2001 From: wea_ondara Date: Sun, 31 May 2020 19:50:12 +0200 Subject: [PATCH] wip --- loader.py | 4 +++- posthist.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/loader.py b/loader.py index 8f402ab..3aa517f 100644 --- a/loader.py +++ b/loader.py @@ -13,6 +13,7 @@ import gc TAG_RE = r'<[^>]+>' TAG_CODE = r'' TAG_MATH = r'' +TAG_MATH_SHORT = r'\$.+\$' printnoln = lambda text: print(text, end='', flush=True) rprint = lambda text: print('\r' + text) @@ -290,6 +291,7 @@ def setprop(dic, key, value): def removetags(text): return re.sub(TAG_RE, '', + re.sub(TAG_MATH_SHORT, '', re.sub(TAG_MATH, '', - re.sub(TAG_CODE, '', text, flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL) + re.sub(TAG_CODE, '', text, flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL) # return TAG_RE.sub('', TAG_MATH.sub('', TAG_CODE.sub('', text))) diff --git a/posthist.py b/posthist.py index 472c624..d5fdba8 100644 --- a/posthist.py +++ b/posthist.py @@ -37,7 +37,9 @@ def main(folder, intervl): newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults() questionsininterval.append(((option_date_from, option_date_to), len(newposts))) newanswers = dmt(posts).map(lambda p: [a for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to], "filtering answers by date") \ - .reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []).getresults() + .filter(lambda a: a != [], "filter out empty answer list").getresults() + print('collecting answers') + newanswers = [e for l in newanswers for e in l] answersininterval.append(((option_date_from, option_date_to), len(newanswers))) postcounts = defaultdict(list) @@ -153,7 +155,7 @@ def main(folder, intervl): fig.savefig(outputdir + "postsanswers-i" + str(intervl) + ".png", bbox_inches='tight') plt.close(fig) - #print data set stats + # print data set stats stats = "" stats += "users: " + str(len(users)) + "\n" stats += "questions: " + str(len(posts)) + "\n" @@ -169,6 +171,7 @@ def main(folder, intervl): with open(outputdir + "/stats.txt", "w") as file: file.write(stats) + if __name__ == "__main__": # execute only if run as a script usage = sys.argv[0] + " "