wip
This commit is contained in:
@@ -13,6 +13,7 @@ import gc
|
|||||||
TAG_RE = r'<[^>]+>'
|
TAG_RE = r'<[^>]+>'
|
||||||
TAG_CODE = r'<code.+</code>'
|
TAG_CODE = r'<code.+</code>'
|
||||||
TAG_MATH = r'<span class="math-container".+</span>'
|
TAG_MATH = r'<span class="math-container".+</span>'
|
||||||
|
TAG_MATH_SHORT = r'\$.+\$'
|
||||||
|
|
||||||
printnoln = lambda text: print(text, end='', flush=True)
|
printnoln = lambda text: print(text, end='', flush=True)
|
||||||
rprint = lambda text: print('\r' + text)
|
rprint = lambda text: print('\r' + text)
|
||||||
@@ -290,6 +291,7 @@ def setprop(dic, key, value):
|
|||||||
|
|
||||||
def removetags(text):
|
def removetags(text):
|
||||||
return re.sub(TAG_RE, '',
|
return re.sub(TAG_RE, '',
|
||||||
|
re.sub(TAG_MATH_SHORT, '',
|
||||||
re.sub(TAG_MATH, '',
|
re.sub(TAG_MATH, '',
|
||||||
re.sub(TAG_CODE, '', text, flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL)
|
re.sub(TAG_CODE, '', text, flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL), flags=re.DOTALL)
|
||||||
# return TAG_RE.sub('', TAG_MATH.sub('', TAG_CODE.sub('', text)))
|
# return TAG_RE.sub('', TAG_MATH.sub('', TAG_CODE.sub('', text)))
|
||||||
|
|||||||
@@ -37,7 +37,9 @@ def main(folder, intervl):
|
|||||||
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
|
newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults()
|
||||||
questionsininterval.append(((option_date_from, option_date_to), len(newposts)))
|
questionsininterval.append(((option_date_from, option_date_to), len(newposts)))
|
||||||
newanswers = dmt(posts).map(lambda p: [a for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to], "filtering answers by date") \
|
newanswers = dmt(posts).map(lambda p: [a for a in p['Answers'] if option_date_from <= a['CreationDate'] < option_date_to], "filtering answers by date") \
|
||||||
.reduce(lambda a, b: a + b, lambda a, b: a + b, lambda: []).getresults()
|
.filter(lambda a: a != [], "filter out empty answer list").getresults()
|
||||||
|
print('collecting answers')
|
||||||
|
newanswers = [e for l in newanswers for e in l]
|
||||||
answersininterval.append(((option_date_from, option_date_to), len(newanswers)))
|
answersininterval.append(((option_date_from, option_date_to), len(newanswers)))
|
||||||
|
|
||||||
postcounts = defaultdict(list)
|
postcounts = defaultdict(list)
|
||||||
@@ -153,7 +155,7 @@ def main(folder, intervl):
|
|||||||
fig.savefig(outputdir + "postsanswers-i" + str(intervl) + ".png", bbox_inches='tight')
|
fig.savefig(outputdir + "postsanswers-i" + str(intervl) + ".png", bbox_inches='tight')
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
#print data set stats
|
# print data set stats
|
||||||
stats = ""
|
stats = ""
|
||||||
stats += "users: " + str(len(users)) + "\n"
|
stats += "users: " + str(len(users)) + "\n"
|
||||||
stats += "questions: " + str(len(posts)) + "\n"
|
stats += "questions: " + str(len(posts)) + "\n"
|
||||||
@@ -169,6 +171,7 @@ def main(folder, intervl):
|
|||||||
with open(outputdir + "/stats.txt", "w") as file:
|
with open(outputdir + "/stats.txt", "w") as file:
|
||||||
file.write(stats)
|
file.write(stats)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# execute only if run as a script
|
# execute only if run as a script
|
||||||
usage = sys.argv[0] + " <folder>"
|
usage = sys.argv[0] + " <folder>"
|
||||||
|
|||||||
Reference in New Issue
Block a user