This commit is contained in:
wea_ondara
2020-01-28 11:33:50 +01:00
parent c2695e0d49
commit 03c86683fb
7 changed files with 47 additions and 23 deletions

View File

@@ -72,15 +72,21 @@ def readVotes(folder):
print(prefix + "done")
return votes
VOTE_TAGS = ['PostId', 'VoteTypeId', 'CreationDate']
VOTE_DTAGS = ['CreationDate']
VOTE_ITAGS = ['PostId', 'VoteTypeId']
def mapvote(item):
datetags = ['CreationDate']
vote = {tag: getTag(item, tag) for tag in VOTE_TAGS}
for tag in datetags:
for tag in VOTE_DTAGS:
if vote[tag] is not None:
vote[tag] = datetime.fromisoformat(vote[tag])
else:
print("map vote: tag " + tag + " is None: " + str(vote))
for tag in VOTE_ITAGS:
vote[tag] = int(vote[tag])
return vote
@@ -112,8 +118,12 @@ def computefirstcontrib(posts):
firstcontrib = {id: min(ldate) for (id, ldate) in firstcontrib.items()}
return firstcontrib
USER_TAGS = ['Id', 'CreationDate']
USER_DTAGS = ['CreationDate']
USER_ITAGS = ['Id']
def mapuser(item):
user = {tag: getTag(item, tag) for tag in USER_TAGS}
for tag in USER_DTAGS:
@@ -121,24 +131,38 @@ def mapuser(item):
user[tag] = datetime.fromisoformat(user[tag])
else:
print("map user: tag " + tag + " is None: " + str(user))
for tag in USER_ITAGS:
user[tag] = int(user[tag])
return user
Q_TAGS = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
Q_DTAGS = ['CreationDate']
Q_ITAGS = ['Id', 'OwnerUserId', 'Score']
Q_BODY = 'Body'
def mapQuestion(item):
question = {tag: getTag(item, tag) for tag in Q_TAGS}
for tag in Q_DTAGS:
question[tag] = datetime.fromisoformat(question[tag])
for tag in Q_ITAGS:
question[tag] = int(question[tag]) if question[tag] is not None else None
question[Q_BODY] = removetags(html.unescape(question[Q_BODY]))
return question
A_TAGS = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
A_DTAGS = ['CreationDate']
A_ITAGS = ['Id', 'ParentId', 'OwnerUserId', 'Score']
def mapAnswer(item):
answer = {tag: getTag(item, tag) for tag in A_TAGS}
for tag in A_DTAGS:
answer[tag] = datetime.fromisoformat(answer[tag])
for tag in A_ITAGS:
answer[tag] = int(answer[tag]) if answer[tag] is not None else None
answer['Body'] = removetags(html.unescape(answer['Body']))
return answer