Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@requires_nltk_corpus
def train(self):
"""Train the Naive Bayes classifier on the movie review corpus."""
super(NaiveBayesAnalyzer, self).train()
neg_ids = nltk.corpus.movie_reviews.fileids('neg')
pos_ids = nltk.corpus.movie_reviews.fileids('pos')
neg_feats = [(self.feature_extractor(
nltk.corpus.movie_reviews.words(fileids=[f])), 'neg') for f in neg_ids]
pos_feats = [(self.feature_extractor(
nltk.corpus.movie_reviews.words(fileids=[f])), 'pos') for f in pos_ids]
train_data = neg_feats + pos_feats
self._classifier = nltk.classify.NaiveBayesClassifier.train(train_data)
@requires_nltk_corpus
def pluralize(self):
'''Return the plural version of the word as a Word.'''
raise NotImplementedError
@requires_nltk_corpus
def lemma(self):
"""Return the lemma of this word using Wordnet's morphy function.
"""
return self.lemmatize(pos=self.pos_tag)
@requires_nltk_corpus
def tokenize(self, text):
'''Return a list of sentences.'''
return nltk.tokenize.sent_tokenize(text)
@requires_nltk_corpus
def singularize(self):
"""Return the singular version of the word as a Word."""
raise NotImplementedError
@requires_nltk_corpus
def tag(self, text, tokenize=True):
"""Tag a string `text`."""
if tokenize:
text = list(word_tokenize(text))
tagged = nltk.tag.pos_tag(text)
return tagged
@requires_nltk_corpus
def train(self):
train_data = nltk.corpus.brown.tagged_sents(categories='news')
regexp_tagger = nltk.RegexpTagger([
(r'^-?[0-9]+(.[0-9]+)?$', 'CD'),
(r'(-|:|;)$', ':'),
(r'\'*$', 'MD'),
(r'(The|the|A|a|An|an)$', 'AT'),
(r'.*able$', 'JJ'),
(r'^[A-Z].*$', 'NNP'),
(r'.*ness$', 'NN'),
(r'.*ly$', 'RB'),
(r'.*s$', 'NNS'),
(r'.*ing$', 'VBG'),
(r'.*ed$', 'VBD'),
(r'.*', 'NN'),
])
@requires_nltk_corpus
def lemmatize(self, pos=None):
"""Return the lemma for a word using WordNet's morphy function.
:param pos: Part of speech to filter upon. If `None`, defaults to
``_wordnet.NOUN``.
.. versionadded:: 0.8.1 (``textblob``)
"""
# if pos is None:
#pos = _wordnet.NOUN
#lemmatizer = nltk.stem.WordNetLemmatizer()
# return lemmatizer.lemmatize(self.string, pos)
raise NotImplementedError
@requires_nltk_corpus
def lemmatize(self, pos=None):
"""Return the lemma for a word using WordNet's morphy function.
:param pos: Part of speech to filter upon. If `None`, defaults to
``_wordnet.NOUN``.
.. versionadded:: 0.8.1
"""
if pos is None:
tag = _wordnet.NOUN
elif pos in _wordnet._FILEMAP.keys():
tag = pos
else:
tag = _penn_to_wordnet(pos)
lemmatizer = nltk.stem.WordNetLemmatizer()
return lemmatizer.lemmatize(self.string, tag)
@requires_nltk_corpus
def train(self):
# train_data = nltk.corpus.brown.tagged_sents(categories=['news','science_fiction'])
self.tagger = nltk.PerceptronTagger()
self._trained = True
return None