This commit is contained in:
wea_ondara
2020-01-27 12:17:42 +01:00
parent da8896eadd
commit c2695e0d49

View File

@@ -72,11 +72,10 @@ def readVotes(folder):
print(prefix + "done")
return votes
VOTE_TAGS = ['PostId', 'VoteTypeId', 'CreationDate']
def mapvote(item):
tags = ['PostId', 'VoteTypeId', 'CreationDate']
datetags = ['CreationDate']
vote = {tag: getTag(item, tag) for tag in tags}
vote = {tag: getTag(item, tag) for tag in VOTE_TAGS}
for tag in datetags:
if vote[tag] is not None:
vote[tag] = datetime.fromisoformat(vote[tag])
@@ -113,47 +112,45 @@ def computefirstcontrib(posts):
firstcontrib = {id: min(ldate) for (id, ldate) in firstcontrib.items()}
return firstcontrib
USER_TAGS = ['Id', 'CreationDate']
USER_DTAGS = ['CreationDate']
def mapuser(item):
tags = ['Id', 'CreationDate']
datetags = ['CreationDate']
user = {tag: getTag(item, tag) for tag in tags}
for tag in datetags:
user = {tag: getTag(item, tag) for tag in USER_TAGS}
for tag in USER_DTAGS:
if user[tag] is not None:
user[tag] = datetime.fromisoformat(user[tag])
else:
print("map user: tag " + tag + " is None: " + str(user))
return user
Q_TAGS = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
Q_DTAGS = ['CreationDate']
Q_BODY = 'Body'
def mapQuestion(item):
tags = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
datetags = ['CreationDate']
question = {tag: getTag(item, tag) for tag in tags}
for tag in datetags:
question = {tag: getTag(item, tag) for tag in Q_TAGS}
for tag in Q_DTAGS:
question[tag] = datetime.fromisoformat(question[tag])
question['Body'] = removetags(html.unescape(question['Body']))
question[Q_BODY] = removetags(html.unescape(question[Q_BODY]))
return question
A_TAGS = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
A_DTAGS = ['CreationDate']
def mapAnswer(item):
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
datetags = ['CreationDate']
answer = {tag: getTag(item, tag) for tag in tags}
for tag in datetags:
answer = {tag: getTag(item, tag) for tag in A_TAGS}
for tag in A_DTAGS:
answer[tag] = datetime.fromisoformat(answer[tag])
answer['Body'] = removetags(html.unescape(answer['Body']))
return answer
def mapComment(item):
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId']
datetags = ['CreationDate']
comment = {tag: getTag(item, tag) for tag in tags}
for tag in datetags:
comment[tag] = datetime.fromisoformat(comment[tag])
comment['Body'] = removetags(html.unescape(comment['Body']))
return comment
# def mapComment(item):
# tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId']
# datetags = ['CreationDate']
# comment = {tag: getTag(item, tag) for tag in tags}
# for tag in datetags:
# comment[tag] = datetime.fromisoformat(comment[tag])
# comment['Body'] = removetags(html.unescape(comment['Body']))
# return comment
def readUsers(file):
@@ -240,14 +237,14 @@ def readAnswers(items):
return answers
def readComments(items):
prefix = "readComments: "
comments = dmt(items).filter(lambda item: getTag(item, 'PostTypeId') == "3", prefix + "filter out comments") \
.map(mapComment, prefix + "mapping comments") \
.filter(lambda c: c['OwnerUserId'] is not None, prefix + "filter out broken comments").getresults()
print(prefix + "comments read: " + str(len(comments)))
return comments
# def readComments(items):
# prefix = "readComments: "
# comments = dmt(items).filter(lambda item: getTag(item, 'PostTypeId') == "3", prefix + "filter out comments") \
# .map(mapComment, prefix + "mapping comments") \
# .filter(lambda c: c['OwnerUserId'] is not None, prefix + "filter out broken comments").getresults()
#
# print(prefix + "comments read: " + str(len(comments)))
# return comments
def getTag(item, tag):