wip
This commit is contained in:
67
loader.py
67
loader.py
@@ -72,11 +72,10 @@ def readVotes(folder):
|
|||||||
print(prefix + "done")
|
print(prefix + "done")
|
||||||
return votes
|
return votes
|
||||||
|
|
||||||
|
VOTE_TAGS = ['PostId', 'VoteTypeId', 'CreationDate']
|
||||||
def mapvote(item):
|
def mapvote(item):
|
||||||
tags = ['PostId', 'VoteTypeId', 'CreationDate']
|
|
||||||
datetags = ['CreationDate']
|
datetags = ['CreationDate']
|
||||||
vote = {tag: getTag(item, tag) for tag in tags}
|
vote = {tag: getTag(item, tag) for tag in VOTE_TAGS}
|
||||||
for tag in datetags:
|
for tag in datetags:
|
||||||
if vote[tag] is not None:
|
if vote[tag] is not None:
|
||||||
vote[tag] = datetime.fromisoformat(vote[tag])
|
vote[tag] = datetime.fromisoformat(vote[tag])
|
||||||
@@ -113,47 +112,45 @@ def computefirstcontrib(posts):
|
|||||||
firstcontrib = {id: min(ldate) for (id, ldate) in firstcontrib.items()}
|
firstcontrib = {id: min(ldate) for (id, ldate) in firstcontrib.items()}
|
||||||
return firstcontrib
|
return firstcontrib
|
||||||
|
|
||||||
|
USER_TAGS = ['Id', 'CreationDate']
|
||||||
|
USER_DTAGS = ['CreationDate']
|
||||||
def mapuser(item):
|
def mapuser(item):
|
||||||
tags = ['Id', 'CreationDate']
|
user = {tag: getTag(item, tag) for tag in USER_TAGS}
|
||||||
datetags = ['CreationDate']
|
for tag in USER_DTAGS:
|
||||||
user = {tag: getTag(item, tag) for tag in tags}
|
|
||||||
for tag in datetags:
|
|
||||||
if user[tag] is not None:
|
if user[tag] is not None:
|
||||||
user[tag] = datetime.fromisoformat(user[tag])
|
user[tag] = datetime.fromisoformat(user[tag])
|
||||||
else:
|
else:
|
||||||
print("map user: tag " + tag + " is None: " + str(user))
|
print("map user: tag " + tag + " is None: " + str(user))
|
||||||
return user
|
return user
|
||||||
|
|
||||||
|
Q_TAGS = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
|
||||||
|
Q_DTAGS = ['CreationDate']
|
||||||
|
Q_BODY = 'Body'
|
||||||
def mapQuestion(item):
|
def mapQuestion(item):
|
||||||
tags = ['Id', 'CreationDate', 'Body', 'Title', 'OwnerUserId', 'OwnerDisplayName', 'Score']
|
question = {tag: getTag(item, tag) for tag in Q_TAGS}
|
||||||
datetags = ['CreationDate']
|
for tag in Q_DTAGS:
|
||||||
question = {tag: getTag(item, tag) for tag in tags}
|
|
||||||
for tag in datetags:
|
|
||||||
question[tag] = datetime.fromisoformat(question[tag])
|
question[tag] = datetime.fromisoformat(question[tag])
|
||||||
question['Body'] = removetags(html.unescape(question['Body']))
|
question[Q_BODY] = removetags(html.unescape(question[Q_BODY]))
|
||||||
return question
|
return question
|
||||||
|
|
||||||
|
A_TAGS = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
|
||||||
|
A_DTAGS = ['CreationDate']
|
||||||
def mapAnswer(item):
|
def mapAnswer(item):
|
||||||
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId', 'Score']
|
answer = {tag: getTag(item, tag) for tag in A_TAGS}
|
||||||
datetags = ['CreationDate']
|
for tag in A_DTAGS:
|
||||||
answer = {tag: getTag(item, tag) for tag in tags}
|
|
||||||
for tag in datetags:
|
|
||||||
answer[tag] = datetime.fromisoformat(answer[tag])
|
answer[tag] = datetime.fromisoformat(answer[tag])
|
||||||
answer['Body'] = removetags(html.unescape(answer['Body']))
|
answer['Body'] = removetags(html.unescape(answer['Body']))
|
||||||
return answer
|
return answer
|
||||||
|
|
||||||
|
|
||||||
def mapComment(item):
|
# def mapComment(item):
|
||||||
tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId']
|
# tags = ['Id', 'ParentId', 'CreationDate', 'Body', 'OwnerUserId']
|
||||||
datetags = ['CreationDate']
|
# datetags = ['CreationDate']
|
||||||
comment = {tag: getTag(item, tag) for tag in tags}
|
# comment = {tag: getTag(item, tag) for tag in tags}
|
||||||
for tag in datetags:
|
# for tag in datetags:
|
||||||
comment[tag] = datetime.fromisoformat(comment[tag])
|
# comment[tag] = datetime.fromisoformat(comment[tag])
|
||||||
comment['Body'] = removetags(html.unescape(comment['Body']))
|
# comment['Body'] = removetags(html.unescape(comment['Body']))
|
||||||
return comment
|
# return comment
|
||||||
|
|
||||||
|
|
||||||
def readUsers(file):
|
def readUsers(file):
|
||||||
@@ -240,14 +237,14 @@ def readAnswers(items):
|
|||||||
return answers
|
return answers
|
||||||
|
|
||||||
|
|
||||||
def readComments(items):
|
# def readComments(items):
|
||||||
prefix = "readComments: "
|
# prefix = "readComments: "
|
||||||
comments = dmt(items).filter(lambda item: getTag(item, 'PostTypeId') == "3", prefix + "filter out comments") \
|
# comments = dmt(items).filter(lambda item: getTag(item, 'PostTypeId') == "3", prefix + "filter out comments") \
|
||||||
.map(mapComment, prefix + "mapping comments") \
|
# .map(mapComment, prefix + "mapping comments") \
|
||||||
.filter(lambda c: c['OwnerUserId'] is not None, prefix + "filter out broken comments").getresults()
|
# .filter(lambda c: c['OwnerUserId'] is not None, prefix + "filter out broken comments").getresults()
|
||||||
|
#
|
||||||
print(prefix + "comments read: " + str(len(comments)))
|
# print(prefix + "comments read: " + str(len(comments)))
|
||||||
return comments
|
# return comments
|
||||||
|
|
||||||
|
|
||||||
def getTag(item, tag):
|
def getTag(item, tag):
|
||||||
|
|||||||
Reference in New Issue
Block a user