From 486614372df8972a04d8d077a194ec185ef0463a Mon Sep 17 00:00:00 2001
From: wea_ondara <wea_ondara@alpenblock.net>
Date: Fri, 3 Apr 2020 14:00:02 +0200
Subject: [PATCH] wip

---
 text/3_method.tex   | 22 ++++++++++++++++++++++
 text/4_datasets.tex | 22 ++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/text/3_method.tex b/text/3_method.tex
index 7171319..d80a535 100644
--- a/text/3_method.tex
+++ b/text/3_method.tex
@@ -2,10 +2,32 @@
 
 % sentiment calculation via vaderlib, write whole paragraph
 % data sets as xml from archive.org
+
+%cleaning data
+% broken entries, missing user id
 % answers in html -> strip html and remove code sections, not contribution to sentiment
+
+
 % calc sentiment for answers
 
 % about the change 
 % https://meta.stackexchange.com/questions/314287/come-take-a-look-at-our-new-contributor-indicator?cb=1
 % https://meta.stackexchange.com/questions/314472/what-are-the-exact-criteria-for-the-new-contributor-indicator-to-be-shown  ; change date = 2018-08-21T21:04:49.177
 % new user indicator visible for 1 week ...
+
+
+
+% differences in avg sentiment
+% look at plots and write something that fits
+
+
+
+%interrupted time series
+% ref tutorial paper
+% often used in medical fields to see if changes have an effect
+% used same tensors as describe in paper, show formula and how it works, 3 tensors describe tensors and what they capture
+% explain why i cose this model, captures the change, more complex model would capture more but also get more complicated, these 3 tensors are enough to see the impact
+% fitting every value not aggregated values, aggregated values would have different weights, weights are too far spread, contrary to paper where person years are more or less constant
+% single value fitting is better, no weight issues, as weights are taken care of via more values
+% if one month has more values than another then that month affects its more as more values are present
+% 
diff --git a/text/4_datasets.tex b/text/4_datasets.tex
index 1de41d9..8abefb4 100644
--- a/text/4_datasets.tex
+++ b/text/4_datasets.tex
@@ -1,7 +1,29 @@
 \chapter{Datasets}
 
+%general
 % from archive.org
 % list of datasets
+% selected largest dataset, smaller datasets data to sparse to take concolusions, statistcal change of outliner to big, outlines would effect the outcome by too much
+% larger data sets yield in more consistent results
 
+
+%sections 1 per site
+\section{StackOverflow.com}
+\section{math.stackexchange.com}
+\section{MathOverflow.com}
+\section{AskUbuntu.com}
+\section{ServerFault.com}
+\section{SuperUser.com}
+\section{electronic.stackexchange.com}
+\section{stats.stackexchange.com}
+\section{tex.stackexchange.com}
+\section{unix.stackexchange.com}
+
+
+
+% general information
+% #user, #questions, #answers, #votes, #avg answer/question
+
+%plots
 % #users
 % #questions, #answers