How to use the scattertext.SampleCorpora function in scattertext

To help you get started, we’ve selected a few scattertext examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github JasonKessler / scattertext / demo_general_inquirer.py View on Github external
def main():
	convention_df = SampleCorpora.ConventionData2012.get_data()
	feat_builder = FeatsFromGeneralInquirer()
	corpus = CorpusFromPandas(convention_df,
	                          category_col='party',
	                          text_col='text',
	                          nlp=whitespace_nlp_with_sentences,
	                          feats_from_spacy_doc=feat_builder).build()
	html = produce_scattertext_explorer(corpus,
	                                    category='democrat',
	                                    category_name='Democratic',
	                                    not_category_name='Republican',
	                                    width_in_pixels=1000,
	                                    metadata=convention_df['speaker'],
	                                    use_non_text_features=True,
	                                    use_full_doc=True,
	                                    topic_model_term_lists=feat_builder.get_top_model_term_lists(),
										metadata_descriptions=feat_builder.get_definitions()
github JasonKessler / scattertext / demo_bow_pca.py View on Github external
from sklearn.decomposition import TruncatedSVD

import scattertext as st
from scattertext import ClassPercentageCompactor, CSRMatrixFactory
from scattertext.representations.CorpusSentenceIterator import CorpusSentenceIterator

convention_df = st.SampleCorpora.ConventionData2012.get_data()
convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)

corpus = (st.CorpusFromParsedDocuments(convention_df,
                                       category_col='party',
                                       parsed_col='parse')
          .build()
          .get_stoplisted_unigram_corpus().select(ClassPercentageCompactor(term_count=3)))


html = st.produce_projection_explorer(corpus,
                                      embeddings=corpus.get_term_doc_mat(),
                                      projection_model=TruncatedSVD(n_components=30, n_iter=10),
                                      x_dim=0,
                                      y_dim=1,
                                      category='democrat',
                                      category_name='Democratic',
github JasonKessler / scattertext / demo_pair_plot_convention_empath.py View on Github external
import scattertext as st
import scattertext.categoryprojector.pairplot

convention_df = st.SampleCorpora.ConventionData2012.get_data()
empath_feature_builder = st.FeatsFromOnlyEmpath()

corpus = st.CorpusFromPandas(
    convention_df,
    category_col='speaker',
    text_col='text',
    nlp=st.whitespace_nlp_with_sentences,
    feats_from_spacy_doc=empath_feature_builder).build().get_unigram_corpus()

html = scattertext.categoryprojector.pairplot.produce_pairplot(corpus,
                                                               use_metadata=True,
                                                               category_projector=st.CategoryProjector(selector=None),
                                                               topic_model_term_lists=empath_feature_builder.get_top_model_term_lists(),
                                                               metadata=convention_df['party'] + ': ' + convention_df['speaker'])

file_name = 'convention_pair_plot_empath.html'