wip
This commit is contained in:
@@ -13,6 +13,7 @@ import gc
|
||||
TAG_RE = r'<[^>]+>'
|
||||
TAG_CODE = r'<code.+</code>'
|
||||
TAG_MATH = r'<span class="math-container".+</span>'
|
||||
TAG_MATH_SHORT = r'\$.+\$'
|
||||
|
||||
printnoln = lambda text: print(text, end='', flush=True)
|
||||
rprint = lambda text: print('\r' + text)
|
||||
@@ -290,6 +291,7 @@ def setprop(dic, key, value):
|
||||
|
||||
def removetags(text):
|
||||
return re.sub(TAG_RE, '',
|
||||
re.sub(TAG_MATH_SHORT, '',
|
||||
re.sub(TAG_MATH, '',
|
||||
re.sub(TAG_CODE, '', text, flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL)
|
||||
re.sub(TAG_CODE, '', text, flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL)
|
||||
# return TAG_RE.sub('', TAG_MATH.sub('', TAG_CODE.sub('', text)))
|
||||
|
||||
@@ -37,7 +37,9 @@ def main(folder, intervl):
|
||||
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
|
||||
questionsininterval.append(((option_date_from, option_date_to), len(newposts)))
|
||||
newanswers = dmt(posts).map(lambda p: [a for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to], "filtering answers by date") \
|
||||
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []).getresults()
|
||||
.filter(lambda a: a != [], "filter out empty answer list").getresults()
|
||||
print('collecting answers')
|
||||
newanswers = [e for l in newanswers for e in l]
|
||||
answersininterval.append(((option_date_from, option_date_to), len(newanswers)))
|
||||
|
||||
postcounts = defaultdict(list)
|
||||
@@ -169,6 +171,7 @@ def main(folder, intervl):
|
||||
with open(outputdir + "/stats.txt", "w") as file:
|
||||
file.write(stats)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# execute only if run as a script
|
||||
usage = sys.argv[0] + " <folder>"
|
||||
|
||||
Reference in New Issue
Block a user