How to use the scattertext.representations.Word2VecFromParsedCorpus.CorpusAdapterForGensim.get_sentences function in scattertext

To help you get started, we’ve selected a few scattertext examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
def _scan_and_build_vocab(self):
		try:
			self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
		except:
			pass
		self.model.build_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
def _scan_and_build_vocab(self):
		from gensim.models import Phrases
		bigram_transformer = Phrases(CorpusAdapterForGensim.get_sentences(self.corpus))
		try:
			self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
		except:
			pass
		self.model.build_vocab(bigram_transformer[CorpusAdapterForGensim.get_sentences(self.corpus)])
github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
def _scan_and_build_vocab(self):
		try:
			self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
		except:
			pass
		self.model.build_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
def _scan_and_build_vocab(self):
		from gensim.models import Phrases
		bigram_transformer = Phrases(CorpusAdapterForGensim.get_sentences(self.corpus))
		try:
			self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
		except:
			pass
		self.model.build_vocab(bigram_transformer[CorpusAdapterForGensim.get_sentences(self.corpus)])
github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
def add_phrases(self, corpus):
		'''
		Parameters
		----------
		corpus: Corpus for phrase augmentation

		Returns
		-------
		New ParsedCorpus containing unigrams in corpus and new phrases
		'''
		from gensim.models import Phrases

		assert isinstance(corpus, ParsedCorpus)
		self.phrases = [Phrases(CorpusAdapterForGensim.get_sentences(corpus), delimiter=' ')]

		for i in range(1, self.max_tokens_per_phrase):
			self.phrases.append(Phrases(self.phrases[-1][CorpusAdapterForGensim.get_sentences(corpus)]))

		return self
github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
'''
		Parameters
		----------
		corpus: Corpus for phrase augmentation

		Returns
		-------
		New ParsedCorpus containing unigrams in corpus and new phrases
		'''
		from gensim.models import Phrases

		assert isinstance(corpus, ParsedCorpus)
		self.phrases = [Phrases(CorpusAdapterForGensim.get_sentences(corpus), delimiter=' ')]

		for i in range(1, self.max_tokens_per_phrase):
			self.phrases.append(Phrases(self.phrases[-1][CorpusAdapterForGensim.get_sentences(corpus)]))

		return self
github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
def _scan_and_build_vocab(self):
		from gensim.models import Phrases
		bigram_transformer = Phrases(CorpusAdapterForGensim.get_sentences(self.corpus))
		try:
			self.model.scan_vocab(CorpusAdapterForGensim.get_sentences(self.corpus))
		except:
			pass
		self.model.build_vocab(bigram_transformer[CorpusAdapterForGensim.get_sentences(self.corpus)])
github JasonKessler / scattertext / scattertext / representations / Word2VecFromParsedCorpus.py View on Github external
'''
		Parameters
		----------
		epochs : int
		  Number of epochs to train for.  Default is 2000.
		training_iterations : int
			Number of times to repeat training process. Default is training_iterations.

		Returns
		-------
		A trained word2vec model.
		'''

		self._scan_and_build_vocab()
		for _ in range(training_iterations):
			self.model.train(CorpusAdapterForGensim.get_sentences(self.corpus),
			                 total_examples=self.model.corpus_count,
			                 epochs=epochs)
		return self.model