This commit is contained in:
wea_ondara
2019-12-28 13:14:49 +01:00
parent 88de1e9c7b
commit e54e4f2938
3 changed files with 75 additions and 37 deletions

View File

@@ -17,7 +17,10 @@ printnoln = lambda text: print(text, end='', flush=True)
rprint = lambda text: print('\r' + text)
def dmt(data, progressinterval=1000): return mt(multiprocessing.cpu_count(), data, False, progressinterval)
def dmt(data, progressinterval=5000): return mt(1, data, False, progressinterval)
def dmp(data, progressinterval=5000): return mt(multiprocessing.cpu_count(), data, False, progressinterval, True)
def cms(): return int(round(time.time() * 1000))
@@ -39,16 +42,16 @@ def load(folder):
def computesumcontrib(posts):
x1 = dmt(posts).map(lambda q: q['OwnerUserId'], "calc sum contrib q").getresults()
x2 = dmt(posts).map(lambda q: [a['OwnerUserId'] for a in q['Answers']], "calc sum contrib a").getresults()
x3 = dmt(posts).map(lambda q: [c['OwnerUserId'] for a in q['Answers'] for c in a['Comments']] + [c['OwnerUserId'] for c in q['Comments']], "calc sum contrib c").getresults()
# x3 = dmt(posts).map(lambda q: [c['OwnerUserId'] for a in q['Answers'] for c in a['Comments']] + [c['OwnerUserId'] for c in q['Comments']], "calc sum contrib c").getresults()
sumcontrib = defaultdict(int)
for id in x1:
sumcontrib[id] += 1
for y in x2:
for id in y:
sumcontrib[id] += 1
for y in x3:
for id in y:
sumcontrib[id] += 1
# for y in x3:
# for id in y:
# sumcontrib[id] += 1
return sumcontrib
@@ -137,13 +140,13 @@ def readPosts(file):
answers = readAnswers(items)
answerids = set(dmt(answers).map(lambda a: a['Id'], prefix + "get answer ids").getresults())
comments = readComments(items)
# comments = readComments(items)
# filter answers
answers = dmt(answers).filter(lambda a: a['ParentId'] in questionids, prefix + "filter answers by a.id in q.id").getresults()
# filter comments
comments = dmt(comments).filter(lambda c: c['ParentId'] in questionids.union(answerids), prefix + "filter comments by c.id in q.id + a.id").getresults()
# comments = dmt(comments).filter(lambda c: c['ParentId'] in questionids.union(answerids), prefix + "filter comments by c.id in q.id + a.id").getresults()
# create question answer mapping
printnoln(prefix + "create qamapping ...")
@@ -153,14 +156,14 @@ def readPosts(file):
rprint(prefix + "create qamapping ... done")
questions = dmt(questions).map(lambda q: setprop(q, 'Answers', qamapping[q['Id']]), prefix + "assign answers to questions").getresults()
# create comment question comment answer mapping
printnoln(prefix + "create qacmapping ...")
qacmapping = {id: [] for id in questionids.union(answerids)}
for c in comments:
qacmapping[c['ParentId']].append(c)
rprint(prefix + "create qacmapping ... done")
answers = dmt(answers).map(lambda a: setprop(a, 'Comments', qacmapping[a['Id']]), prefix + "assign comments to answers").getresults()
questions = dmt(questions).map(lambda q: setprop(q, 'Comments', qacmapping[q['Id']]), prefix + "assign comments to questions").getresults()
# # create comment question comment answer mapping
# printnoln(prefix + "create qacmapping ...")
# qacmapping = {id: [] for id in questionids.union(answerids)}
# for c in comments:
# qacmapping[c['ParentId']].append(c)
# rprint(prefix + "create qacmapping ... done")
# answers = dmt(answers).map(lambda a: setprop(a, 'Comments', qacmapping[a['Id']]), prefix + "assign comments to answers").getresults()
# questions = dmt(questions).map(lambda q: setprop(q, 'Comments', qacmapping[q['Id']]), prefix + "assign comments to questions").getresults()
# safety check
countans = dmt(questions).map(lambda q: len(q['Answers']), prefix + "sum answer count") \