wip
This commit is contained in:
13
loader.py
13
loader.py
@@ -1,5 +1,7 @@
|
||||
import html
|
||||
import multiprocessing
|
||||
import operator
|
||||
import re
|
||||
import time
|
||||
import xml.etree.cElementTree as et
|
||||
from collections import defaultdict
|
||||
@@ -7,11 +9,13 @@ from datetime import datetime
|
||||
|
||||
from mt import mt
|
||||
|
||||
TAG_RE = re.compile(r'<[^>]+>')
|
||||
|
||||
printnoln = lambda text: print(text, end='', flush=True)
|
||||
rprint = lambda text: print('\r' + text)
|
||||
|
||||
|
||||
def dmt(data): return mt(multiprocessing.cpu_count(), data, False)
|
||||
def dmt(data, progressinterval=1000): return mt(multiprocessing.cpu_count(), data, False, progressinterval)
|
||||
|
||||
|
||||
def cms(): return int(round(time.time() * 1000))
|
||||
@@ -75,6 +79,7 @@ def mapQuestion(item):
|
||||
question = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
question[tag] = datetime.fromisoformat(question[tag])
|
||||
question['Body'] = removetags(html.unescape(question['Body']))
|
||||
return question
|
||||
|
||||
|
||||
@@ -84,6 +89,7 @@ def mapAnswer(item):
|
||||
answer = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
answer[tag] = datetime.fromisoformat(answer[tag])
|
||||
answer['Body'] = removetags(html.unescape(answer['Body']))
|
||||
return answer
|
||||
|
||||
|
||||
@@ -93,6 +99,7 @@ def mapComment(item):
|
||||
comment = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
comment[tag] = datetime.fromisoformat(comment[tag])
|
||||
comment['Body'] = removetags(html.unescape(comment['Body']))
|
||||
return comment
|
||||
|
||||
|
||||
@@ -201,3 +208,7 @@ def tagExists(item, tag):
|
||||
def setprop(dic, key, value):
|
||||
dic[key] = value
|
||||
return dic
|
||||
|
||||
|
||||
def removetags(text):
|
||||
return TAG_RE.sub('', text)
|
||||
|
||||
Reference in New Issue
Block a user