import os import sys from collections import defaultdict import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator from common import calc_intervals, IMAGE_MAGICK from loader import load, dmt colors = ['red', 'green', 'blue', 'orange', 'deeppink'] def main(folder, intervl): users, posts, firstcontrib, sumcontrib = load(folder) intervals = calc_intervals(posts, intervl) outputdir = folder + "/output/posthist/" os.system("mkdir -p " + outputdir) activeusercounts = [] imgmagickcmd = IMAGE_MAGICK for (option_date_from, option_date_to) in intervals: print((option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y"))) # filter posts by option_date_from <= creation date <= option_date_to newposts = dmt(posts).filter(lambda p: option_date_from <= p['CreationDate'] < option_date_to, "filtering posts by date").getresults() postcounts = defaultdict(list) i = 0 for p in newposts: postcounts[p['OwnerUserId']].append(p) i = i + 1 postcounts = {id: len(pc) for (id, pc) in postcounts.items()} activeusercounts.append(((option_date_from, option_date_to), len(postcounts.keys()))) histfilename = outputdir + "posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y") histdata = [pc for pc in postcounts.values()] fig = plt.figure(figsize=(16, 12)) plt.hist(histdata, range(max(histdata, default=0) + 1)) plt.yscale('log') plt.ylim(bottom=0) plt.xlabel("#posts") plt.ylabel("#users with X posts") fig.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) plt.title("Histogram for user post count registered between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y")) fig.savefig(histfilename + ".png", bbox_inches='tight') plt.close(fig) imgmagickcmd += " " + histfilename + ".png" os.system(imgmagickcmd + " " + outputdir + "/posthist.pdf") fig = plt.figure(figsize=(16, 12)) plt.plot([x[0] for (x, y) in activeusercounts], [y for (x, y) in activeusercounts]) plt.yscale('log') plt.ylim(bottom=0) plt.title("Active users") fig.savefig(outputdir + "activeusers.png", bbox_inches='tight') plt.close(fig) if __name__ == "__main__": # execute only if run as a script usage = sys.argv[0] + " " if len(sys.argv) < 2: print(usage) sys.exit(1) folder = sys.argv[1] if not os.path.isdir(folder): print(folder + " is not a folder") sys.exit(1) interval = 3 if len(sys.argv) >= 3: if sys.argv[2].startswith("-i"): interval = sys.argv[2][2:] try: interval = int(interval) except ValueError: print("-i: int required") sys.exit(1) if interval < 1 or interval > 12: print("-i: only 1 - 12") sys.exit(1) else: print("unknown parameter: " + sys.argv[2]) sys.exit(1) main(folder, interval)