This commit is contained in:
wea_ondara
2020-11-03 16:47:32 +01:00
parent 544d2d3476
commit defef8383d
3 changed files with 152 additions and 2 deletions

View File

@@ -205,6 +205,86 @@ Quality also depends on the type of platform. \cite{lin2017better} showed that e
% A comprehensive survey and classification of approaches for community question answering \cite{srba2016comprehensive}, meta study on papers published between 2005 and 2014
\subsection{Analysis}
%general blabla
% sentiment intensity (Valence based), lexical features
% sentiment analyse: es gibt 10-15 methoden,
% alle sentiment methoden + vader
\subsubsection{Sentiment analysis}
%challenges (vader)
% - coverage (e.g. of lexical features, important in mircoblog texts)
% - sentiment intensity (some of the following tools ignore intensity completly (just -1, or 1)
% - creating a human-validated gold standard lexicon is very time consuming/labor intensive, with sentiment valence scores, feature detection and context awareness,
%%%%% handcrafted
%liwc (Linguistic Inquiry and Word Count) \cite{pennebaker2001linguistic,pennebakerdevelopment}, 2001
% - acronyms, initialisms, emoticons, or slang, which are known to be important for sentiment analysis of social text (vader)
% - cannot recognise sentiment intensity (all word have an equal weight) (vader)
% - ca 4500 words (uptodate?), ca 400 pos words, ca 500 neg words, lexicon proprietary (vader)
% ...
%General Inquirer (GI) \cite{stone1966general} 1966
% - 11k words, 1900 pos, 2300 neg, all approx
% - very old (1966), continuously refined, still in use (vader)
% - misses lexical feature detection (acronyms, ...) and sentiment intensity (vader)
%Hu-Liu04 \cite{hu2004mining,liu2005opinion}, 2004
% - 6800 words, 2000 pos, 4800 neg, all approx values (vader)
% - better suited for social media text, misses acronyms/initialisms (vader)
% - bootstrapped from wordnet (wellknown english lexical database) (vader)
%Word-Sense Disambiguation (WSD) \cite{akkaya2009subjectivity}, 2009
% - TODO
%wordnet \cite{miller1998wordnet} 1998
% - TODO
%sentiwordnet \cite{baccianella2010sentiwordnet}
% - TODO
%ANEW (Affective Norms for English Words) \cite{bradley1999affective}
% - TODO
%SenticNet \cite{cambria2010senticnet}
% - TODO
%%%%% automated (machine learning)
%often require large training sets, compare to creating a lexicon (vader)
%training data must represent as many features as possible, otherwise feature is not learned, often not the case (vader)
%very cpu and memory intensive, slow, compare to lexicon-based (vader)
%derived features not nachvollziehbar as a human (black-box) (vader)
%generaization problem (vader)
%updateing (extend/modify) hard (e.g. new domain) (vader)
% naive bayes
% - simple (vader)
% - assumption: feature probabilties are indepenend of each other (vader)
% Maximum Entropy
% - exponential model + logistic regression (vader)
% - feature weighting through not assuming indepenence as in naive bayes (vader)
%svm
%- mathemtical anspruchsvoll (vader)
%- seperate datapoints using hyper planes (vader)
%- long training period (other methods do not need training at all because lexica) (vader)
%vader (Valence Aware Dictionary for sEntiment Reasoning)(grob) \cite{hutto2014vader}
% - 2014
% - detects acyrnoms, ...
% - sentiment intensity
% - not just 1 and -1 for pos and neg but value in a range
% - context awareness
% - disabliguation of words if they have multiple meanings (contextual meaning)
%general
%dep on sentiment lexicons, more info in vader 2.1 Sentiment Lexicons
%vader not binary (pos, neg) but 3 categories
% its
% ursprüngliches paper ITS, wie hat man das früher (davor) gemacht
\subsubsection{Trend analysis}