wip
This commit is contained in:
@@ -205,6 +205,86 @@ Quality also depends on the type of platform. \cite{lin2017better} showed that e
|
||||
% A comprehensive survey and classification of approaches for community question answering \cite{srba2016comprehensive}, meta study on papers published between 2005 and 2014
|
||||
|
||||
|
||||
\subsection{Analysis}
|
||||
|
||||
%general blabla
|
||||
% sentiment intensity (Valence based), lexical features
|
||||
|
||||
|
||||
|
||||
|
||||
% sentiment analyse: es gibt 10-15 methoden,
|
||||
% alle sentiment methoden + vader
|
||||
\subsubsection{Sentiment analysis}
|
||||
|
||||
%challenges (vader)
|
||||
% - coverage (e.g. of lexical features, important in mircoblog texts)
|
||||
% - sentiment intensity (some of the following tools ignore intensity completly (just -1, or 1)
|
||||
% - creating a human-validated gold standard lexicon is very time consuming/labor intensive, with sentiment valence scores, feature detection and context awareness,
|
||||
|
||||
%%%%% handcrafted
|
||||
%liwc (Linguistic Inquiry and Word Count) \cite{pennebaker2001linguistic,pennebakerdevelopment}, 2001
|
||||
% - acronyms, initialisms, emoticons, or slang, which are known to be important for sentiment analysis of social text (vader)
|
||||
% - cannot recognise sentiment intensity (all word have an equal weight) (vader)
|
||||
% - ca 4500 words (uptodate?), ca 400 pos words, ca 500 neg words, lexicon proprietary (vader)
|
||||
% ...
|
||||
%General Inquirer (GI) \cite{stone1966general} 1966
|
||||
% - 11k words, 1900 pos, 2300 neg, all approx
|
||||
% - very old (1966), continuously refined, still in use (vader)
|
||||
% - misses lexical feature detection (acronyms, ...) and sentiment intensity (vader)
|
||||
%Hu-Liu04 \cite{hu2004mining,liu2005opinion}, 2004
|
||||
% - 6800 words, 2000 pos, 4800 neg, all approx values (vader)
|
||||
% - better suited for social media text, misses acronyms/initialisms (vader)
|
||||
% - bootstrapped from wordnet (wellknown english lexical database) (vader)
|
||||
%Word-Sense Disambiguation (WSD) \cite{akkaya2009subjectivity}, 2009
|
||||
% - TODO
|
||||
%wordnet \cite{miller1998wordnet} 1998
|
||||
% - TODO
|
||||
%sentiwordnet \cite{baccianella2010sentiwordnet}
|
||||
% - TODO
|
||||
%ANEW (Affective Norms for English Words) \cite{bradley1999affective}
|
||||
% - TODO
|
||||
%SenticNet \cite{cambria2010senticnet}
|
||||
% - TODO
|
||||
|
||||
%%%%% automated (machine learning)
|
||||
%often require large training sets, compare to creating a lexicon (vader)
|
||||
%training data must represent as many features as possible, otherwise feature is not learned, often not the case (vader)
|
||||
%very cpu and memory intensive, slow, compare to lexicon-based (vader)
|
||||
%derived features not nachvollziehbar as a human (black-box) (vader)
|
||||
%generaization problem (vader)
|
||||
%updateing (extend/modify) hard (e.g. new domain) (vader)
|
||||
% naive bayes
|
||||
% - simple (vader)
|
||||
% - assumption: feature probabilties are indepenend of each other (vader)
|
||||
% Maximum Entropy
|
||||
% - exponential model + logistic regression (vader)
|
||||
% - feature weighting through not assuming indepenence as in naive bayes (vader)
|
||||
%svm
|
||||
%- mathemtical anspruchsvoll (vader)
|
||||
%- seperate datapoints using hyper planes (vader)
|
||||
%- long training period (other methods do not need training at all because lexica) (vader)
|
||||
|
||||
|
||||
%vader (Valence Aware Dictionary for sEntiment Reasoning)(grob) \cite{hutto2014vader}
|
||||
% - 2014
|
||||
% - detects acyrnoms, ...
|
||||
% - sentiment intensity
|
||||
% - not just 1 and -1 for pos and neg but value in a range
|
||||
% - context awareness
|
||||
% - disabliguation of words if they have multiple meanings (contextual meaning)
|
||||
|
||||
%general
|
||||
%dep on sentiment lexicons, more info in vader 2.1 Sentiment Lexicons
|
||||
%vader not binary (pos, neg) but 3 categories
|
||||
|
||||
|
||||
|
||||
|
||||
% its
|
||||
% ursprüngliches paper ITS, wie hat man das früher (davor) gemacht
|
||||
\subsubsection{Trend analysis}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
70
text/bib.bib
70
text/bib.bib
@@ -286,3 +286,73 @@
|
||||
pages={1341--1352},
|
||||
year={2013}
|
||||
}
|
||||
@article{pennebaker2001linguistic,
|
||||
title={Linguistic inquiry and word count: LIWC 2001},
|
||||
author={Pennebaker, James W and Francis, Martha E and Booth, Roger J},
|
||||
journal={Mahway: Lawrence Erlbaum Associates},
|
||||
volume={71},
|
||||
number={2001},
|
||||
pages={2001},
|
||||
year={2001}
|
||||
}
|
||||
@article{pennebakerdevelopment,
|
||||
title={The Development and Psychometric Properties of LIWC2007},
|
||||
author={Pennebaker, James W and Chung, Cindy K and Ireland, Molly and Gonzales, Amy and Booth, Roger J}
|
||||
}
|
||||
@article{stone1966general,
|
||||
title={The general inquirer: A computer approach to content analysis.},
|
||||
author={Stone, Philip J and Dunphy, Dexter C and Smith, Marshall S},
|
||||
year={1966},
|
||||
publisher={MIT press}
|
||||
}
|
||||
@inproceedings{liu2005opinion,
|
||||
title={Opinion observer: analyzing and comparing opinions on the web},
|
||||
author={Liu, Bing and Hu, Minqing and Cheng, Junsheng},
|
||||
booktitle={Proceedings of the 14th international conference on World Wide Web},
|
||||
pages={342--351},
|
||||
year={2005}
|
||||
}
|
||||
@inproceedings{hu2004mining,
|
||||
title={Mining and summarizing customer reviews},
|
||||
author={Hu, Minqing and Liu, Bing},
|
||||
booktitle={Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining},
|
||||
pages={168--177},
|
||||
year={2004}
|
||||
}
|
||||
@inproceedings{akkaya2009subjectivity,
|
||||
title={Subjectivity word sense disambiguation},
|
||||
author={Akkaya, Cem and Wiebe, Janyce and Mihalcea, Rada},
|
||||
booktitle={Proceedings of the 2009 conference on empirical methods in natural language processing},
|
||||
pages={190--199},
|
||||
year={2009}
|
||||
}
|
||||
@book{miller1998wordnet,
|
||||
title={WordNet: An electronic lexical database},
|
||||
author={Miller, George A},
|
||||
year={1998},
|
||||
publisher={MIT press}
|
||||
}
|
||||
@inproceedings{baccianella2010sentiwordnet,
|
||||
title={Sentiwordnet 3.0: an enhanced lexical resource for sentiment analysis and opinion mining.},
|
||||
author={Baccianella, Stefano and Esuli, Andrea and Sebastiani, Fabrizio},
|
||||
booktitle={Lrec},
|
||||
volume={10},
|
||||
number={2010},
|
||||
pages={2200--2204},
|
||||
year={2010}
|
||||
}
|
||||
@inproceedings{cambria2010senticnet,
|
||||
title={Senticnet: A publicly available semantic resource for opinion mining.},
|
||||
author={Cambria, Erik and Speer, Robert and Havasi, Catherine and Hussain, Amir},
|
||||
booktitle={AAAI fall symposium: commonsense knowledge},
|
||||
volume={10},
|
||||
number={0},
|
||||
year={2010},
|
||||
organization={Citeseer}
|
||||
}
|
||||
@techreport{bradley1999affective,
|
||||
title={Affective norms for English words (ANEW): Instruction manual and affective ratings},
|
||||
author={Bradley, Margaret M and Lang, Peter J},
|
||||
year={1999},
|
||||
institution={Technical report C-1, the center for research in psychophysiology~…}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user