From defef8383d8c2111310a7e58482919974b688ae4 Mon Sep 17 00:00:00 2001 From: wea_ondara Date: Tue, 3 Nov 2020 16:47:32 +0100 Subject: [PATCH] wip --- text/2_relwork.tex | 80 ++++++++++++++++++++++++++++++++++++++++++++++ text/bib.bib | 70 ++++++++++++++++++++++++++++++++++++++++ todo2 | 4 +-- 3 files changed, 152 insertions(+), 2 deletions(-) diff --git a/text/2_relwork.tex b/text/2_relwork.tex index b6e4e84..6717c4f 100644 --- a/text/2_relwork.tex +++ b/text/2_relwork.tex @@ -205,6 +205,86 @@ Quality also depends on the type of platform. \cite{lin2017better} showed that e % A comprehensive survey and classification of approaches for community question answering \cite{srba2016comprehensive}, meta study on papers published between 2005 and 2014 +\subsection{Analysis} + +%general blabla +% sentiment intensity (Valence based), lexical features + + + + +% sentiment analyse: es gibt 10-15 methoden, +% alle sentiment methoden + vader +\subsubsection{Sentiment analysis} + +%challenges (vader) +% - coverage (e.g. of lexical features, important in mircoblog texts) +% - sentiment intensity (some of the following tools ignore intensity completly (just -1, or 1) +% - creating a human-validated gold standard lexicon is very time consuming/labor intensive, with sentiment valence scores, feature detection and context awareness, + +%%%%% handcrafted +%liwc (Linguistic Inquiry and Word Count) \cite{pennebaker2001linguistic,pennebakerdevelopment}, 2001 +% - acronyms, initialisms, emoticons, or slang, which are known to be important for sentiment analysis of social text (vader) +% - cannot recognise sentiment intensity (all word have an equal weight) (vader) +% - ca 4500 words (uptodate?), ca 400 pos words, ca 500 neg words, lexicon proprietary (vader) +% ... +%General Inquirer (GI) \cite{stone1966general} 1966 +% - 11k words, 1900 pos, 2300 neg, all approx +% - very old (1966), continuously refined, still in use (vader) +% - misses lexical feature detection (acronyms, ...) and sentiment intensity (vader) +%Hu-Liu04 \cite{hu2004mining,liu2005opinion}, 2004 +% - 6800 words, 2000 pos, 4800 neg, all approx values (vader) +% - better suited for social media text, misses acronyms/initialisms (vader) +% - bootstrapped from wordnet (wellknown english lexical database) (vader) +%Word-Sense Disambiguation (WSD) \cite{akkaya2009subjectivity}, 2009 +% - TODO +%wordnet \cite{miller1998wordnet} 1998 +% - TODO +%sentiwordnet \cite{baccianella2010sentiwordnet} +% - TODO +%ANEW (Affective Norms for English Words) \cite{bradley1999affective} +% - TODO +%SenticNet \cite{cambria2010senticnet} +% - TODO + +%%%%% automated (machine learning) +%often require large training sets, compare to creating a lexicon (vader) +%training data must represent as many features as possible, otherwise feature is not learned, often not the case (vader) +%very cpu and memory intensive, slow, compare to lexicon-based (vader) +%derived features not nachvollziehbar as a human (black-box) (vader) +%generaization problem (vader) +%updateing (extend/modify) hard (e.g. new domain) (vader) +% naive bayes +% - simple (vader) +% - assumption: feature probabilties are indepenend of each other (vader) +% Maximum Entropy +% - exponential model + logistic regression (vader) +% - feature weighting through not assuming indepenence as in naive bayes (vader) +%svm +%- mathemtical anspruchsvoll (vader) +%- seperate datapoints using hyper planes (vader) +%- long training period (other methods do not need training at all because lexica) (vader) + + +%vader (Valence Aware Dictionary for sEntiment Reasoning)(grob) \cite{hutto2014vader} +% - 2014 +% - detects acyrnoms, ... +% - sentiment intensity +% - not just 1 and -1 for pos and neg but value in a range +% - context awareness +% - disabliguation of words if they have multiple meanings (contextual meaning) + +%general +%dep on sentiment lexicons, more info in vader 2.1 Sentiment Lexicons +%vader not binary (pos, neg) but 3 categories + + + + +% its +% ursprüngliches paper ITS, wie hat man das früher (davor) gemacht +\subsubsection{Trend analysis} + diff --git a/text/bib.bib b/text/bib.bib index 87132db..ad509fa 100644 --- a/text/bib.bib +++ b/text/bib.bib @@ -286,3 +286,73 @@ pages={1341--1352}, year={2013} } +@article{pennebaker2001linguistic, + title={Linguistic inquiry and word count: LIWC 2001}, + author={Pennebaker, James W and Francis, Martha E and Booth, Roger J}, + journal={Mahway: Lawrence Erlbaum Associates}, + volume={71}, + number={2001}, + pages={2001}, + year={2001} +} +@article{pennebakerdevelopment, + title={The Development and Psychometric Properties of LIWC2007}, + author={Pennebaker, James W and Chung, Cindy K and Ireland, Molly and Gonzales, Amy and Booth, Roger J} +} +@article{stone1966general, + title={The general inquirer: A computer approach to content analysis.}, + author={Stone, Philip J and Dunphy, Dexter C and Smith, Marshall S}, + year={1966}, + publisher={MIT press} +} +@inproceedings{liu2005opinion, + title={Opinion observer: analyzing and comparing opinions on the web}, + author={Liu, Bing and Hu, Minqing and Cheng, Junsheng}, + booktitle={Proceedings of the 14th international conference on World Wide Web}, + pages={342--351}, + year={2005} +} +@inproceedings{hu2004mining, + title={Mining and summarizing customer reviews}, + author={Hu, Minqing and Liu, Bing}, + booktitle={Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining}, + pages={168--177}, + year={2004} +} +@inproceedings{akkaya2009subjectivity, + title={Subjectivity word sense disambiguation}, + author={Akkaya, Cem and Wiebe, Janyce and Mihalcea, Rada}, + booktitle={Proceedings of the 2009 conference on empirical methods in natural language processing}, + pages={190--199}, + year={2009} +} +@book{miller1998wordnet, + title={WordNet: An electronic lexical database}, + author={Miller, George A}, + year={1998}, + publisher={MIT press} +} +@inproceedings{baccianella2010sentiwordnet, + title={Sentiwordnet 3.0: an enhanced lexical resource for sentiment analysis and opinion mining.}, + author={Baccianella, Stefano and Esuli, Andrea and Sebastiani, Fabrizio}, + booktitle={Lrec}, + volume={10}, + number={2010}, + pages={2200--2204}, + year={2010} +} +@inproceedings{cambria2010senticnet, + title={Senticnet: A publicly available semantic resource for opinion mining.}, + author={Cambria, Erik and Speer, Robert and Havasi, Catherine and Hussain, Amir}, + booktitle={AAAI fall symposium: commonsense knowledge}, + volume={10}, + number={0}, + year={2010}, + organization={Citeseer} +} +@techreport{bradley1999affective, + title={Affective norms for English words (ANEW): Instruction manual and affective ratings}, + author={Bradley, Margaret M and Lang, Peter J}, + year={1999}, + institution={Technical report C-1, the center for research in psychophysiology~…} +} diff --git a/todo2 b/todo2 index 40fc617..bcd107c 100644 --- a/todo2 +++ b/todo2 @@ -3,7 +3,7 @@ 2. - related work ausbauen - mehr referencen -- ursprunges papaer ITS, wi hat man das fruüher gemacht +- ursprüngliches paper ITS, wie hat man das früher (davor) gemacht - onboarding passt, community growth und sustanibiltiy machen - sentiment analyse: es gibt 10-15 methoden, @@ -11,7 +11,7 @@ 3. - argumente warum ich genau diese variablen (sentiment, votes, #questions) -- limitierungen, andere factoren +- limitierungen, andere faktoren - vader genau beschreiben 5.