wip
This commit is contained in:
67
loader.py
67
loader.py
@@ -72,11 +72,10 @@ def readVotes(folder):
|
||||
print(prefix + "done")
|
||||
return votes
|
||||
|
||||
|
||||
VOTE_TAGS = ['PostId', 'VoteTypeId', 'CreationDate']
|
||||
def mapvote(item):
|
||||
tags = ['PostId', 'VoteTypeId', 'CreationDate']
|
||||
datetags = ['CreationDate']
|
||||
vote = {tag: getTag(item, tag) for tag in tags}
|
||||
vote = {tag: getTag(item, tag) for tag in VOTE_TAGS}
|
||||
for tag in datetags:
|
||||
if vote[tag] is not None:
|
||||
vote[tag] = datetime.fromisoformat(vote[tag])
|
||||
@@ -113,47 +112,45 @@ def computefirstcontrib(posts):
|
||||
firstcontrib = {id: min(ldate) for (id, ldate) in firstcontrib.items()}
|
||||
return firstcontrib
|
||||
|
||||
|
||||
USER_TAGS = ['Id', 'CreationDate']
|
||||
USER_DTAGS = ['CreationDate']
|
||||
def mapuser(item):
|
||||
tags = ['Id', 'CreationDate']
|
||||
datetags = ['CreationDate']
|
||||
user = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
user = {tag: getTag(item, tag) for tag in USER_TAGS}
|
||||
for tag in USER_DTAGS:
|
||||
if user[tag] is not None:
|
||||
user[tag] = datetime.fromisoformat(user[tag])
|
||||
else:
|
||||
print("map user: tag " + tag + " is None: " + str(user))
|
||||
return user
|
||||
|
||||
|
||||
Q_TAGS = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
|
||||
Q_DTAGS = ['CreationDate']
|
||||
Q_BODY = 'Body'
|
||||
def mapQuestion(item):
|
||||
tags = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
|
||||
datetags = ['CreationDate']
|
||||
question = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
question = {tag: getTag(item, tag) for tag in Q_TAGS}
|
||||
for tag in Q_DTAGS:
|
||||
question[tag] = datetime.fromisoformat(question[tag])
|
||||
question['Body'] = removetags(html.unescape(question['Body']))
|
||||
question[Q_BODY] = removetags(html.unescape(question[Q_BODY]))
|
||||
return question
|
||||
|
||||
|
||||
A_TAGS = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
|
||||
A_DTAGS = ['CreationDate']
|
||||
def mapAnswer(item):
|
||||
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
|
||||
datetags = ['CreationDate']
|
||||
answer = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
answer = {tag: getTag(item, tag) for tag in A_TAGS}
|
||||
for tag in A_DTAGS:
|
||||
answer[tag] = datetime.fromisoformat(answer[tag])
|
||||
answer['Body'] = removetags(html.unescape(answer['Body']))
|
||||
return answer
|
||||
|
||||
|
||||
def mapComment(item):
|
||||
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId']
|
||||
datetags = ['CreationDate']
|
||||
comment = {tag: getTag(item, tag) for tag in tags}
|
||||
for tag in datetags:
|
||||
comment[tag] = datetime.fromisoformat(comment[tag])
|
||||
comment['Body'] = removetags(html.unescape(comment['Body']))
|
||||
return comment
|
||||
# def mapComment(item):
|
||||
# tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId']
|
||||
# datetags = ['CreationDate']
|
||||
# comment = {tag: getTag(item, tag) for tag in tags}
|
||||
# for tag in datetags:
|
||||
# comment[tag] = datetime.fromisoformat(comment[tag])
|
||||
# comment['Body'] = removetags(html.unescape(comment['Body']))
|
||||
# return comment
|
||||
|
||||
|
||||
def readUsers(file):
|
||||
@@ -240,14 +237,14 @@ def readAnswers(items):
|
||||
return answers
|
||||
|
||||
|
||||
def readComments(items):
|
||||
prefix = "readComments: "
|
||||
comments = dmt(items).filter(lambda item: getTag(item, 'PostTypeId') == "3", prefix + "filter out comments") \
|
||||
.map(mapComment, prefix + "mapping comments") \
|
||||
.filter(lambda c: c['OwnerUserId'] is not None, prefix + "filter out broken comments").getresults()
|
||||
|
||||
print(prefix + "comments read: " + str(len(comments)))
|
||||
return comments
|
||||
# def readComments(items):
|
||||
# prefix = "readComments: "
|
||||
# comments = dmt(items).filter(lambda item: getTag(item, 'PostTypeId') == "3", prefix + "filter out comments") \
|
||||
# .map(mapComment, prefix + "mapping comments") \
|
||||
# .filter(lambda c: c['OwnerUserId'] is not None, prefix + "filter out broken comments").getresults()
|
||||
#
|
||||
# print(prefix + "comments read: " + str(len(comments)))
|
||||
# return comments
|
||||
|
||||
|
||||
def getTag(item, tag):
|
||||
|
||||
Reference in New Issue
Block a user