Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_topics_from_terms(self,
terms=None,
num_terms_per_topic=10,
scorer=RankDifference()):
'''
Parameters
----------
terms : list or None
If terms is list, make these the seed terms for the topoics
If none, use the first 30 terms in get_scaled_f_scores_vs_background
num_terms_per_topic : int, default 10
Use this many terms per topic
scorer : TermScorer
Implements get_scores, default is RankDifferce, which tends to work best
Returns
-------
dict: {term: [term1, ...], ...}
'''
topic_model = {}
def _build_square(self, term_doc_matrix, term_ranker, labels, scorer):
self.term_doc_matrix_ = term_doc_matrix
self.term_ranker = term_ranker(term_doc_matrix)
self.scorer = RankDifference() \
if scorer is None else scorer
self.axes = self._build_axes(scorer)
self.lexicons = self._build_lexicons()
self._labels = labels
score_transform=stretch_0_to_1,
verbose=verbose
).hide_terms(terms_to_hide)
if default_to_term_comparison:
if topic_model_term_lists is not None:
term_scatter_chart_explorer.inject_metadata_term_lists(topic_model_term_lists)
if metadata_descriptions is not None:
term_scatter_chart_explorer.inject_metadata_descriptions(metadata_descriptions)
if use_metadata:
tdf = corpus.get_metadata_freq_df('')
else:
tdf = corpus.get_term_freq_df('')
scores = RankDifference().get_scores(
tdf[initial_category], tdf[[c for c in corpus.get_categories() if c != initial_category]].sum(axis=1)
)
term_scatter_chart_data = term_scatter_chart_explorer.to_dict(
category=initial_category,
scores=scores,
include_term_category_counts=True,
transform=dense_rank,
**kwargs
)
y_label = initial_category,
x_label = 'Not ' + initial_category,
color_func = None
show_top_terms = True
show_axes = False
else:
def produce_characteristic_explorer(corpus,
category,
category_name=None,
not_category_name=None,
not_categories=None,
characteristic_scorer=DenseRankCharacteristicness(),
term_ranker=termranking.AbsoluteFrequencyRanker,
term_scorer=RankDifference(),
x_label='Characteristic to Corpus',
y_label=None,
y_axis_labels=None,
scores=None,
vertical_lines=None,
**kwargs):
'''
Parameters
----------
corpus : Corpus
It is highly recommended to use a stoplisted, unigram corpus-- `corpus.get_stoplisted_unigram_corpus()`
category : str
category_name : str
not_category_name : str
not_categories : list
characteristic_scorer : CharacteristicScorer
def _get_default_scores(self, category, other_categories, df):
category_column_name = category + ' freq'
cat_word_counts = df[category_column_name]
not_cat_word_counts = df[[c + ' freq' for c in other_categories]].sum(axis=1)
# scores = ScaledFScore.get_scores(cat_word_counts, not_cat_word_counts)
scores = RankDifference().get_scores(cat_word_counts, not_cat_word_counts)
return scores
convention_df,
category_col='party',
text_col='text',
nlp=whitespace_nlp_with_sentences
).build().get_unigram_corpus().compact(AssociationCompactor(4000))
html = produce_scattertext_explorer(
corpus,
category='democrat',
category_name='Democratic',
not_category_name='Republican',
minimum_term_frequency=0,
pmi_threshold_coefficient=0,
width_in_pixels=1000,
metadata=convention_df['speaker'],
term_scorer=RankDifference(),
transform=dense_rank
)
open('./demo_dense_rank.html', 'wb').write(html.encode('utf-8'))
print('Open ./demo_dense_rank.html in Chrome or Firefox.')
projector=lambda n_terms, n_dims: CategoryProjector(
selector=AssociationCompactor(n_terms, scorer=RankDifference),
projector=PCA(n_dims)),
optimizer = morista_index,
def __init__(self,
weighter=LengthNormalizer(),
normalizer=StandardScaler(),
selector=AssociationCompactor(1000, RankDifference),
projector=PCA(2)):
'''
:param weighter: instance of an sklearn class with fit_transform to weight X category corpus.
:param normalizer: instance of an sklearn class with fit_transform to normalize term X category corpus.
:param selector: instance of a compactor class, if None, no compaction will be done.
:param projector: instance an sklearn class with fit_transform
'''
self.weighter_ = weighter
self.normalizer_ = normalizer
self.selector_ = selector
self.projector_ = projector