Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main():
convention_df = SampleCorpora.ConventionData2012.get_data()
feat_builder = FeatsFromGeneralInquirer()
corpus = CorpusFromPandas(convention_df,
category_col='party',
text_col='text',
nlp=whitespace_nlp_with_sentences,
feats_from_spacy_doc=feat_builder).build()
html = produce_scattertext_explorer(corpus,
category='democrat',
category_name='Democratic',
not_category_name='Republican',
width_in_pixels=1000,
metadata=convention_df['speaker'],
use_non_text_features=True,
use_full_doc=True,
topic_model_term_lists=feat_builder.get_top_model_term_lists(),
metadata_descriptions=feat_builder.get_definitions()
from sklearn.decomposition import TruncatedSVD
import scattertext as st
from scattertext import ClassPercentageCompactor, CSRMatrixFactory
from scattertext.representations.CorpusSentenceIterator import CorpusSentenceIterator
convention_df = st.SampleCorpora.ConventionData2012.get_data()
convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)
corpus = (st.CorpusFromParsedDocuments(convention_df,
category_col='party',
parsed_col='parse')
.build()
.get_stoplisted_unigram_corpus().select(ClassPercentageCompactor(term_count=3)))
html = st.produce_projection_explorer(corpus,
embeddings=corpus.get_term_doc_mat(),
projection_model=TruncatedSVD(n_components=30, n_iter=10),
x_dim=0,
y_dim=1,
category='democrat',
category_name='Democratic',
import scattertext as st
import scattertext.categoryprojector.pairplot
convention_df = st.SampleCorpora.ConventionData2012.get_data()
empath_feature_builder = st.FeatsFromOnlyEmpath()
corpus = st.CorpusFromPandas(
convention_df,
category_col='speaker',
text_col='text',
nlp=st.whitespace_nlp_with_sentences,
feats_from_spacy_doc=empath_feature_builder).build().get_unigram_corpus()
html = scattertext.categoryprojector.pairplot.produce_pairplot(corpus,
use_metadata=True,
category_projector=st.CategoryProjector(selector=None),
topic_model_term_lists=empath_feature_builder.get_top_model_term_lists(),
metadata=convention_df['party'] + ': ' + convention_df['speaker'])
file_name = 'convention_pair_plot_empath.html'