How to use the textblob.Blobber function in textblob

To help you get started, we’ve selected a few textblob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sloria / TextBlob / tests / test_blob.py View on Github external
def test_overrider_classifier(self):
        b = tb.Blobber(classifier=classifier)
        blob = b("I am so amazing")
        assert_equal(blob.classify(), 'pos')
github sloria / TextBlob / tests / test_blob.py View on Github external
def setUp(self):
        self.blobber = tb.Blobber()  # The default blobber
github esh-b / Feature-based-opinion-mining / FileCreationWithBigrams.py View on Github external
reviewContent.append(review)
					review = []
				reviewTitle.append(line.split("[-][t]")[1].rstrip("\r\n"))

			else:
				if "##" in line:
					x = line.split("##")
					#if len(x[0]) != 0:
					for i in range(1, len(x)):
						review.append(x[i].rstrip("\r\n"))
				else:
					continue
		reviewContent.append(review)

		#tb = Blobber(pos_tagger=PerceptronTagger()) 
		tb = Blobber(pos_tagger=NLTKTagger())
		nounScores = dict()

		#Writing to a file
		f = open('modified.txt', 'w')
		for a in range(len(reviewContent)):
			f.write("[t]")
			
			#Finding Bigrams in title
			text = reviewTitle[a]
		
			x = tb(text).tags #NLTK tagger		
			e = 0
				
			while e
github esh-b / Feature-based-opinion-mining / HAC.py View on Github external
def findFeatures(reviewContent,filename):
	#nounScores is the dict containing nouns from all reviews and their respective scores from HAC algorithm
	nounScores = dict()

	#adjDict dict contains adjective and the corresponding noun which it is assigned to
	adjDict = dict()
	tb = Blobber(pos_tagger = NLTKTagger())

	for a in range(len(reviewContent)):								#Stores the score of the nouns
		for i in range(len(reviewContent[a])):
			text = ' '.join([word for word in reviewContent[a][i].split() if word not in stopwords.words("english")])
			text = ''.join(ch for ch in text if ch not in exclude)
			text = nltk.word_tokenize(text)
			x = nltk.pos_tag(text)

			#Get the noun/adjective words and store it in tagList
			tagList = []
			for e in x:
				if(e[1] == "NN" or e[1] == "JJ"):
					tagList.append(e)
	
			#Add the nouns(which are not in the nounScores dict) to the dict
			for e in tagList:
github viveksck / langchangetrack / langchangetrack / utils / scripts / pos_tag.py View on Github external
def main(args):
    f = open(args.filename)
    D = {}
    tag_set = set([])
    tb = Blobber(pos_tagger=PerceptronTagger())
    for i, line in enumerate(f):
        b1 = tb(line)
        for w, t in b1.tags:
            tag_set.add(t)
            if w not in D:
                D[w] = Counter()
            D[w][t] = float(D[w][t] + 1)

    sorted_pos_tags = sorted(list(tag_set))
    rows = []
    for w in D.keys():
        row = [w]
        pos_counts_word = np.array([float(D[w][t]) for t in sorted_pos_tags])
        pos_dist_word = pos_counts_word / float(np.sum(pos_counts_word))
        assert(np.isclose(np.sum(pos_dist_word), 1.0))
        row = row + list(pos_dist_word)
github frnsys / geiger / geiger / text / tokenize.py View on Github external
import re
from collections import defaultdict
from textblob import Blobber
from textblob_aptagger import PerceptronTagger
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nytnlp.keywords import rake
from nytnlp.util import penn_to_wordnet
from geiger.text.clean import clean_doc
from geiger.knowledge import Bigram


blob = Blobber(pos_tagger=PerceptronTagger())
stops = stopwords.words('english')
lem = WordNetLemmatizer()

import config
bigram = Bigram(remote=config.remote)


def keyword_tokenize(doc):
    """
    Tokenizes a document so that only keywords and phrases
    are returned. Keywords are returned as lemmas.
    """
    doc = clean_doc(doc)
    blo = blob(doc)

    # Only process tokens which are keywords