textblob.en.sentiments — TextBlob 0.19.0 documentation (original) (raw)

Source code for textblob.en.sentiments

"""Sentiment analysis implementations.

.. versionadded:: 0.5.0 """ from collections import namedtuple

import nltk

from textblob.base import CONTINUOUS, DISCRETE, BaseSentimentAnalyzer from textblob.decorators import requires_nltk_corpus from textblob.en import sentiment as pattern_sentiment from textblob.tokenizers import word_tokenize

[docs] class PatternAnalyzer(BaseSentimentAnalyzer): """Sentiment analyzer that uses the same implementation as the pattern library. Returns results as a named tuple of the form:

``Sentiment(polarity, subjectivity, [assessments])``

where [assessments] is a list of the assessed tokens and their
polarity and subjectivity scores
"""

kind = CONTINUOUS
# This is only here for backwards-compatibility.
# The return type is actually determined upon calling analyze()
RETURN_TYPE = namedtuple("Sentiment", ["polarity", "subjectivity"])

[docs] def analyze(self, text, keep_assessments=False): """Return the sentiment as a named tuple of the form: Sentiment(polarity, subjectivity, [assessments]). """ #: Return type declaration if keep_assessments: Sentiment = namedtuple( "Sentiment", ["polarity", "subjectivity", "assessments"] ) assessments = pattern_sentiment(text).assessments polarity, subjectivity = pattern_sentiment(text) return Sentiment(polarity, subjectivity, assessments)

    else:
        Sentiment = namedtuple("Sentiment", ["polarity", "subjectivity"])
        return Sentiment(*pattern_sentiment(text))

def _default_feature_extractor(words): """Default feature extractor for the NaiveBayesAnalyzer.""" return dict((word, True) for word in words)

[docs] class NaiveBayesAnalyzer(BaseSentimentAnalyzer): """Naive Bayes analyzer that is trained on a dataset of movie reviews. Returns results as a named tuple of the form: Sentiment(classification, p_pos, p_neg)

:param callable feature_extractor: Function that returns a dictionary of
    features, given a list of words.
"""

kind = DISCRETE
#: Return type declaration
RETURN_TYPE = namedtuple("Sentiment", ["classification", "p_pos", "p_neg"])

def __init__(self, feature_extractor=_default_feature_extractor):
    super().__init__()
    self._classifier = None
    self.feature_extractor = feature_extractor

[docs] @requires_nltk_corpus def train(self): """Train the Naive Bayes classifier on the movie review corpus.""" super().train() neg_ids = nltk.corpus.movie_reviews.fileids("neg") pos_ids = nltk.corpus.movie_reviews.fileids("pos") neg_feats = [ ( self.feature_extractor(nltk.corpus.movie_reviews.words(fileids=[f])), "neg", ) for f in neg_ids ] pos_feats = [ ( self.feature_extractor(nltk.corpus.movie_reviews.words(fileids=[f])), "pos", ) for f in pos_ids ] train_data = neg_feats + pos_feats self._classifier = nltk.classify.NaiveBayesClassifier.train(train_data)

[docs] def analyze(self, text): """Return the sentiment as a named tuple of the form: Sentiment(classification, p_pos, p_neg) """ # Lazily train the classifier super().analyze(text) tokens = word_tokenize(text, include_punc=False) filtered = (t.lower() for t in tokens if len(t) >= 3) feats = self.feature_extractor(filtered) prob_dist = self._classifier.prob_classify(feats) return self.RETURN_TYPE( classification=prob_dist.max(), p_pos=prob_dist.prob("pos"), p_neg=prob_dist.prob("neg"), )