From 486614372df8972a04d8d077a194ec185ef0463a Mon Sep 17 00:00:00 2001 From: wea_ondara Date: Fri, 3 Apr 2020 14:00:02 +0200 Subject: [PATCH] wip --- text/3_method.tex | 22 ++++++++++++++++++++++ text/4_datasets.tex | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/text/3_method.tex b/text/3_method.tex index 7171319..d80a535 100644 --- a/text/3_method.tex +++ b/text/3_method.tex @@ -2,10 +2,32 @@ % sentiment calculation via vaderlib, write whole paragraph % data sets as xml from archive.org + +%cleaning data +% broken entries, missing user id % answers in html -> strip html and remove code sections, not contribution to sentiment + + % calc sentiment for answers % about the change % https://meta.stackexchange.com/questions/314287/come-take-a-look-at-our-new-contributor-indicator?cb=1 % https://meta.stackexchange.com/questions/314472/what-are-the-exact-criteria-for-the-new-contributor-indicator-to-be-shown ; change date = 2018-08-21T21:04:49.177 % new user indicator visible for 1 week ... + + + +% differences in avg sentiment +% look at plots and write something that fits + + + +%interrupted time series +% ref tutorial paper +% often used in medical fields to see if changes have an effect +% used same tensors as describe in paper, show formula and how it works, 3 tensors describe tensors and what they capture +% explain why i cose this model, captures the change, more complex model would capture more but also get more complicated, these 3 tensors are enough to see the impact +% fitting every value not aggregated values, aggregated values would have different weights, weights are too far spread, contrary to paper where person years are more or less constant +% single value fitting is better, no weight issues, as weights are taken care of via more values +% if one month has more values than another then that month affects its more as more values are present +% diff --git a/text/4_datasets.tex b/text/4_datasets.tex index 1de41d9..8abefb4 100644 --- a/text/4_datasets.tex +++ b/text/4_datasets.tex @@ -1,7 +1,29 @@ \chapter{Datasets} +%general % from archive.org % list of datasets +% selected largest dataset, smaller datasets data to sparse to take concolusions, statistcal change of outliner to big, outlines would effect the outcome by too much +% larger data sets yield in more consistent results + +%sections 1 per site +\section{StackOverflow.com} +\section{math.stackexchange.com} +\section{MathOverflow.com} +\section{AskUbuntu.com} +\section{ServerFault.com} +\section{SuperUser.com} +\section{electronic.stackexchange.com} +\section{stats.stackexchange.com} +\section{tex.stackexchange.com} +\section{unix.stackexchange.com} + + + +% general information +% #user, #questions, #answers, #votes, #avg answer/question + +%plots % #users % #questions, #answers