This commit is contained in:
wea_ondara
2019-07-16 11:51:40 +02:00
parent a557cbd5b9
commit bca211551c
4 changed files with 192 additions and 61 deletions

View File

@@ -8,6 +8,7 @@ import matplotlib.pyplot as plt
from collections import defaultdict from collections import defaultdict
from loader import load, dmt, cms from loader import load, dmt, cms
import math import math
from common import calc_intervals
printnoln = lambda text: print(text, end='', flush=True) printnoln = lambda text: print(text, end='', flush=True)
rprint = lambda text: print('\r' + text) rprint = lambda text: print('\r' + text)
@@ -23,6 +24,7 @@ def main(folder):
users, posts, firstcontrib, sumcontrib = load(folder) users, posts, firstcontrib, sumcontrib = load(folder)
intervals = calc_intervals(posts) intervals = calc_intervals(posts)
cachedsentiments = {}
postcounts = range(1, 5 + 1) postcounts = range(1, 5 + 1)
for (option_date_from, option_date_to) in intervals: for (option_date_from, option_date_to) in intervals:
@@ -79,7 +81,11 @@ def main(folder):
printnoln("\rcomputing toxic levels: post #" + str(i + 1) + "/" + str(len(filteredposts))) printnoln("\rcomputing toxic levels: post #" + str(i + 1) + "/" + str(len(filteredposts)))
userid = post['OwnerUserId'] userid = post['OwnerUserId']
for a in post['Answers']: for a in post['Answers']:
toxlevel = computeToxLevel(a['Body']) if a['Id'] in cachedsentiments.keys():
toxlevel = cachedsentiments[a['Id']]
else:
toxlevel = computeToxLevel(a['Body'])
cachedsentiments[a['Id']] = toxlevel
toxlevels[userid].append(toxlevel) toxlevels[userid].append(toxlevel)
rprint("computing toxic levels: post #" + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... took " + str(cms() - start) + "ms") rprint("computing toxic levels: post #" + str(len(filteredposts)) + "/" + str(len(filteredposts)) + " ... took " + str(cms() - start) + "ms")
@@ -99,23 +105,17 @@ def main(folder):
fig, axs = plt.subplots(2, 2, figsize=(16, 12)) fig, axs = plt.subplots(2, 2, figsize=(16, 12))
axs[0, 0].set_title('Neg') axs[0, 0].set_title('Neg')
axs[0, 0].hist(neglevelsflat, np.linspace(-1, 1, 2 * 100))
axs[1, 0].set_title('Neu') axs[1, 0].set_title('Neu')
axs[1, 0].hist(neulevelsflat, np.linspace(-1, 1, 2 * 100))
axs[0, 1].set_title('Pos') axs[0, 1].set_title('Pos')
axs[0, 1].hist(poslevelsflat, np.linspace(-1, 1, 2 * 100))
axs[1, 1].set_title('Compound') axs[1, 1].set_title('Compound')
axs[0, 0].hist(neglevelsflat, np.linspace(0, 1, 1 * 100))
axs[1, 0].hist(neulevelsflat, np.linspace(0, 1, 1 * 100))
axs[0, 1].hist(poslevelsflat, np.linspace(0, 1, 1 * 100))
axs[1, 1].hist(comlevelsflat, np.linspace(-1, 1, 2 * 100)) axs[1, 1].hist(comlevelsflat, np.linspace(-1, 1, 2 * 100))
axs[0, 0].set_yscale('log')
# global axs[1, 0].set_yscale('log')
# gaxs[0, 0].hist(neglevelsflat, np.linspace(-1, 1, 2 * 100), label=str(option_posts) + " posts") axs[0, 1].set_yscale('log')
# gaxs[1, 0].hist(neulevelsflat, np.linspace(-1, 1, 2 * 100), label=str(option_posts) + " posts") axs[1, 1].set_yscale('log')
# gaxs[0, 1].hist(poslevelsflat, np.linspace(-1, 1, 2 * 100), label=str(option_posts) + " posts")
# gaxs[1, 1].hist(comlevelsflat, np.linspace(-1, 1, 2 * 100), label=str(option_posts) + " posts")
# gaxs[0, 0].hist(neglevelsflat, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), label=str(option_posts) + " posts")
# gaxs[1, 0].hist(neulevelsflat, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), label=str(option_posts) + " posts")
# gaxs[0, 1].hist(poslevelsflat, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), label=str(option_posts) + " posts")
# gaxs[1, 1].hist(comlevelsflat, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), label=str(option_posts) + " posts")
# plt.show() # plt.show()
fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts\nUsers registered between " fig.suptitle("Sentiment of answers to the first " + str(option_posts) + " (max) posts\nUsers registered between "
@@ -124,22 +124,18 @@ def main(folder):
plt.close(fig) plt.close(fig)
# global # global
gaxs[0, 0].hist(gneg, np.linspace(-1, 1, 2 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts]) gaxs[0, 0].hist(gneg, np.linspace(0, 1, 1 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
gaxs[1, 0].hist(gneu, np.linspace(-1, 1, 2 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts]) gaxs[1, 0].hist(gneu, np.linspace(0, 1, 1 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
gaxs[0, 1].hist(gpos, np.linspace(-1, 1, 2 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts]) gaxs[0, 1].hist(gpos, np.linspace(0, 1, 1 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
gaxs[1, 1].hist(gcom, np.linspace(-1, 1, 2 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts]) gaxs[1, 1].hist(gcom, np.linspace(-1, 1, 2 * 100), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[0, 0].hist(gneg, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[1, 0].hist(gneu, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[0, 1].hist(gpos, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[1, 1].hist(gcom, np.linspace(-1, 1, 2 * 100), alpha=1. / len(postcounts), color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[0, 0].hist(gneg, np.linspace(-1, 1, 2 * 100), stacked=True, color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[1, 0].hist(gneu, np.linspace(-1, 1, 2 * 100), stacked=True, color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[0, 1].hist(gpos, np.linspace(-1, 1, 2 * 100), stacked=True, color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
# gaxs[1, 1].hist(gcom, np.linspace(-1, 1, 2 * 100), stacked=True, color=colors[:len(postcounts)], label=[str(option_posts) + " posts" for option_posts in postcounts])
gaxs[0, 0].legend(loc="upper right") gaxs[0, 0].legend(loc="upper right")
gaxs[1, 0].legend(loc="upper right") gaxs[1, 0].legend(loc="upper right")
gaxs[0, 1].legend(loc="upper right") gaxs[0, 1].legend(loc="upper right")
gaxs[1, 1].legend(loc="upper right") gaxs[1, 1].legend(loc="upper right")
gaxs[0, 0].set_yscale('log')
gaxs[1, 0].set_yscale('log')
gaxs[0, 1].set_yscale('log')
gaxs[1, 1].set_yscale('log')
gfig.suptitle("Sentiment of answers to the first X (max) posts\nUsers registered between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y")) gfig.suptitle("Sentiment of answers to the first X (max) posts\nUsers registered between " + option_date_from.strftime("%d-%m-%Y") + " to " + option_date_to.strftime("%d-%m-%Y"))
gfig.savefig(goutfilename + ".png", bbox_inches='tight') gfig.savefig(goutfilename + ".png", bbox_inches='tight')
plt.close(gfig) plt.close(gfig)
@@ -159,34 +155,6 @@ def dumptoxlevels(lvls, filename):
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n") file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
def calc_intervals(posts):
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'], "firstpost").getresults()
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
# calc quarter beginning
firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
if firstpost.month not in (1, 4, 7, 10):
firstpost = firstpost.replace(month={1: 1, 2: 1, 3: 1, 4: 4, 5: 4, 6: 4, 7: 7, 8: 7, 9: 7, 10: 10, 11: 10, 12: 10}[firstpost.month])
lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
if lastpost.month not in (1, 4, 7, 10):
lastpost = lastpost.replace(month={1: 1, 2: 1, 3: 1, 4: 4, 5: 4, 6: 4, 7: 7, 8: 7, 9: 7, 10: 10, 11: 10, 12: 10}[lastpost.month])
# add 3 months to last post
if lastpost.month == 10:
lastpost = lastpost.replace(month=1, year=lastpost.year + 1)
else:
lastpost = lastpost.replace(month=lastpost.month + 3)
cdate = firstpost
intervals = []
while cdate < lastpost:
nextquarter = cdate.replace(month=(cdate.month + 3) % 12, year=cdate.year + (0 if cdate.month + 3 < 12 else 1))
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
intervals.append((cdate, nextquarter))
cdate = nextquarter
# sys.exit(0)
return intervals
if __name__ == "__main__": if __name__ == "__main__":
# execute only if run as a script # execute only if run as a script
usage = sys.argv[0] + " <folder>" usage = sys.argv[0] + " <folder>"

View File

@@ -7,6 +7,9 @@ from scipy.stats import ks_2samp
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np
colors = {'neg': 'red', 'neu': 'green', 'pos': 'blue', 'com': 'orange'}
def main(folder): def main(folder):
@@ -70,19 +73,40 @@ def plotbypost(onlyfiles):
f2 = l[i + 1] f2 = l[i + 1]
f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[p][i]) + "; ks neu = " + str(changes_neu[p][i]) f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[p][i]) + "; ks neu = " + str(changes_neu[p][i])
+ "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n") + "; ks pos = " + str(changes_pos[p][i]) + "; ks com = " + str(changes_com[p][i]) + "\n")
# pval
for (p, l) in files.items():
x = [l[i].split("_")[2] + " -\n" + l[i + 1].split("_")[2] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12))
for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items():
pval = [x.pvalue for x in changes]
plt.plot(x, pval, label=type + ".pval", color=colors[type])
mean = np.mean(pval)
std = np.std(pval)
dev = [(xx, s) for (xx, s) in zip(x, pval) if s <= mean - std or s >= mean + std]
plt.plot(x, [mean] * len(pval), color=colors[type], ls='dashed')
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with max " + str(p) + " posts")
plt.xticks(rotation=90)
plt.legend(loc="upper right")
plt.savefig(folder + "/ks_pval_" + str(p) + ".png", bbox_inches='tight')
plt.close(fig)
# stat
for (p, l) in files.items(): for (p, l) in files.items():
x = [l[i].split("_")[2] + " -\n" + l[i + 1].split("_")[2] for i in range(len(l) - 1)] x = [l[i].split("_")[2] + " -\n" + l[i + 1].split("_")[2] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12)) fig = plt.figure(figsize=(16, 12))
for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items(): for type, changes in {"neg": changes_neg[p], "neu": changes_neu[p], "pos": changes_pos[p], "com": changes_com[p]}.items():
stat = [x.statistic for x in changes] stat = [x.statistic for x in changes]
pval = [x.pvalue for x in changes] plt.plot(x, stat, label=type + ".stat", color=colors[type])
plt.plot(x, stat, label=type + ".stat") mean = np.mean(stat)
plt.plot(x, pval, label=type + ".pval") std = np.std(stat)
dev = [(xx, s) for (xx, s) in zip(x, stat) if s <= mean - std or s >= mean + std]
plt.plot(x, [mean] * len(stat), color=colors[type], ls='dashed')
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with max " + str(p) + " posts") plt.title("KS 2-sided test with max " + str(p) + " posts")
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.legend(loc="upper right") plt.legend(loc="upper right")
plt.savefig(folder + "/ks_" + str(p) + ".png", bbox_inches='tight') plt.savefig(folder + "/ks_stat_" + str(p) + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)
@@ -134,18 +158,40 @@ def plotbydate(onlyfiles):
f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[d][i]) + "; ks neu = " + str(changes_neu[d][i]) f.write(f1 + " -> " + f2 + ": ks neg = " + str(changes_neg[d][i]) + "; ks neu = " + str(changes_neu[d][i])
+ "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n") + "; ks pos = " + str(changes_pos[d][i]) + "; ks com = " + str(changes_com[d][i]) + "\n")
# pval
for (d, l) in files.items():
x = [l[i].split("_")[4][:-3] + "-" + l[i + 1].split("_")[4][:-3] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12))
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
pval = [x.pvalue for x in changes]
plt.plot(x, pval, label=type + ".pval", color=colors[type])
mean = np.mean(pval)
std = np.std(pval)
dev = [(xx, s) for (xx, s) in zip(x, pval) if s <= mean - std or s >= mean + std]
plt.plot(x, [mean] * len(pval), color=colors[type], ls='dashed')
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
plt.xticks(rotation=90)
plt.legend(loc="upper right")
plt.savefig(folder + "/ks_pval_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.close(fig)
# stat
for (d, l) in files.items(): for (d, l) in files.items():
x = [l[i].split("_")[4][:-3] + "-" + l[i + 1].split("_")[4][:-3] for i in range(len(l) - 1)] x = [l[i].split("_")[4][:-3] + "-" + l[i + 1].split("_")[4][:-3] for i in range(len(l) - 1)]
fig = plt.figure(figsize=(16, 12)) fig = plt.figure(figsize=(16, 12))
for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items(): for type, changes in {"neg": changes_neg[d], "neu": changes_neu[d], "pos": changes_pos[d], "com": changes_com[d]}.items():
stat = [x.statistic for x in changes] stat = [x.statistic for x in changes]
pval = [x.pvalue for x in changes] plt.plot(x, stat, label=type + ".stat", color=colors[type])
plt.plot(x, stat, label=type + ".stat") mean = np.mean(stat)
plt.plot(x, pval, label=type + ".pval") std = np.std(stat)
dev = [(xx, s) for (xx, s) in zip(x, stat) if s <= mean - std or s >= mean + std]
plt.plot(x, [mean] * len(stat), color=colors[type], ls='dashed')
plt.plot([xx for (xx, s) in dev], [s for (xx, s) in dev], color=colors[type], ls='None', marker='o')
plt.title("KS 2-sided test with between " + d[0] + " and " + d[1]) plt.title("KS 2-sided test with between " + d[0] + " and " + d[1])
plt.xticks(rotation=90) plt.xticks(rotation=90)
plt.legend(loc="upper right") plt.legend(loc="upper right")
plt.savefig(folder + "/ks_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight') plt.savefig(folder + "/ks_stat_" + d[0] + "_" + d[1] + ".png", bbox_inches='tight')
plt.close(fig) plt.close(fig)

29
common.py Normal file
View File

@@ -0,0 +1,29 @@
from loader import load, dmt, cms
def calc_intervals(posts):
firstpost = dmt(posts).reduce(lambda acc, e: acc if acc < e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc < e else e, lambda: posts[0]['CreationDate'], "firstpost").getresults()
lastpost = dmt(posts).reduce(lambda acc, e: acc if acc > e['CreationDate'] else e['CreationDate'], lambda acc, e: acc if acc > e else e, lambda: posts[0]['CreationDate'], "lastpost").getresults()
# calc quarter beginning
firstpost = firstpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
if firstpost.month not in (1, 4, 7, 10):
firstpost = firstpost.replace(month={1: 1, 2: 1, 3: 1, 4: 4, 5: 4, 6: 4, 7: 7, 8: 7, 9: 7, 10: 10, 11: 10, 12: 10}[firstpost.month])
lastpost = lastpost.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
if lastpost.month not in (1, 4, 7, 10):
lastpost = lastpost.replace(month={1: 1, 2: 1, 3: 1, 4: 4, 5: 4, 6: 4, 7: 7, 8: 7, 9: 7, 10: 10, 11: 10, 12: 10}[lastpost.month])
# add 3 months to last post
if lastpost.month == 10:
lastpost = lastpost.replace(month=1, year=lastpost.year + 1)
else:
lastpost = lastpost.replace(month=lastpost.month + 3)
cdate = firstpost
intervals = []
while cdate < lastpost:
nextquarter = cdate.replace(month=(cdate.month + 3) % 12, year=cdate.year + (0 if cdate.month + 3 < 12 else 1))
print("adding interval: " + cdate.strftime("%d-%m-%Y") + " - " + nextquarter.strftime("%d-%m-%Y"))
intervals.append((cdate, nextquarter))
cdate = nextquarter
# sys.exit(0)
return intervals

88
posthist.py Normal file
View File

@@ -0,0 +1,88 @@
from datetime import datetime
from datetime import timedelta
import sys
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
from loader import load, dmt, cms
import math
from common import calc_intervals
printnoln = lambda text: print(text, end='', flush=True)
rprint = lambda text: print('\r' + text)
DAYS_NEW_USER = 7
OLD_USER_YEAR = 3
analyser = SentimentIntensityAnalyzer()
colors = ['red', 'green', 'blue', 'orange', 'deeppink']
def main(folder):
users, posts, firstcontrib, sumcontrib = load(folder)
intervals = calc_intervals(posts)
for (option_date_from, option_date_to) in intervals:
print((option_date_from.strftime("%d-%m-%Y"), option_date_to.strftime("%d-%m-%Y")))
# filter posts by option_date_from <= creation date <= option_date_to
newusers = set(dmt(users).filter(lambda u: option_date_from <= u['CreationDate'] < option_date_to, "filtering users by creation").map(lambda u: u['Id'], "getting user ids").getresults())
newposts = dmt(posts).filter(lambda p: p['OwnerUserId'] in newusers, "filtering posts by users").getresults()
postcounts = defaultdict(list)
i = 0
for p in newposts:
postcounts[p['OwnerUserId']].append(p)
i = i + 1
postcounts = {id: len(pc) for (id, pc) in postcounts.items()}
# print("i: " + str(i) + " expected: " + str(len(newposts)) + " is: " + str(sum([pc for pc in postcounts.values()])))
os.system("mkdir -p " + folder + "/output")
histfilename = folder + "/output/posthist_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
countfilename = folder + "/output/postcount_" + folder.split("/")[-1] + "_" + option_date_from.strftime("%d-%m-%Y") + "_" + option_date_to.strftime("%d-%m-%Y")
# fig = plt.figure(figsize=(16, 12))
# plt.plot(userids, [len(pc) for pc in postcounts])
# plt.title("Post count for users between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
# plt.xticks(rotation=90)
# fig.savefig(countfilename + ".png", bbox_inches='tight')
# plt.close(fig)
histdata = [pc for pc in postcounts.values()]
fig = plt.figure(figsize=(16, 12))
plt.hist(histdata, range(max(histdata, default=0) + 1))
plt.yscale('log')
plt.ylim(bottom=0)
plt.title("Histogram for user post count registered between " + option_date_from.strftime("%d-%m-%Y") + " and " + option_date_to.strftime("%d-%m-%Y"))
fig.savefig(histfilename + ".png", bbox_inches='tight')
plt.close(fig)
def computeToxLevel(text):
return analyser.polarity_scores(text)
def flatmap(arr):
return [item for sublist in arr for item in sublist]
def dumptoxlevels(lvls, filename):
with open(filename, "w") as file:
file.write("from collections import defaultdict\n\n")
file.write("toxlevels = " + str(lvls).replace("<class 'list'>", "list", 1) + "\n")
if __name__ == "__main__":
# execute only if run as a script
usage = sys.argv[0] + " <folder>"
if len(sys.argv) < 2:
print(usage)
sys.exit(1)
folder = sys.argv[1]
if not os.path.isdir(folder):
print(folder + " is not a folder")
sys.exit(1)
main(folder)