How to use the scattertext.termscoring.RankDifference.RankDifference function in scattertext

To help you get started, we’ve selected a few scattertext examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github JasonKessler / scattertext / scattertext / topicmodel / SentencesForTopicModeling.py View on Github external
def get_topics_from_terms(self,
	                          terms=None,
	                          num_terms_per_topic=10,
	                          scorer=RankDifference()):
		'''
		Parameters
		----------
		terms : list or None
			If terms is list, make these the seed terms for the topoics
			If none, use the first 30 terms in get_scaled_f_scores_vs_background
		num_terms_per_topic : int, default 10
			Use this many terms per topic
		scorer : TermScorer
			Implements get_scores, default is RankDifferce, which tends to work best

		Returns
		-------
		dict: {term: [term1, ...], ...}
		'''
		topic_model = {}
github JasonKessler / scattertext / scattertext / semioticsquare / SemioticSquare.py View on Github external
def _build_square(self, term_doc_matrix, term_ranker, labels, scorer):
        self.term_doc_matrix_ = term_doc_matrix
        self.term_ranker = term_ranker(term_doc_matrix)
        self.scorer = RankDifference() \
            if scorer is None else scorer
        self.axes = self._build_axes(scorer)
        self.lexicons = self._build_lexicons()
        self._labels = labels
github JasonKessler / scattertext / scattertext / categoryprojector / pairplot.py View on Github external
score_transform=stretch_0_to_1,
        verbose=verbose
    ).hide_terms(terms_to_hide)

    if default_to_term_comparison:
        if topic_model_term_lists is not None:
            term_scatter_chart_explorer.inject_metadata_term_lists(topic_model_term_lists)
        if metadata_descriptions is not None:
            term_scatter_chart_explorer.inject_metadata_descriptions(metadata_descriptions)

        if use_metadata:
            tdf = corpus.get_metadata_freq_df('')
        else:
            tdf = corpus.get_term_freq_df('')

        scores = RankDifference().get_scores(
            tdf[initial_category], tdf[[c for c in corpus.get_categories() if c != initial_category]].sum(axis=1)
        )

        term_scatter_chart_data = term_scatter_chart_explorer.to_dict(
            category=initial_category,
            scores=scores,
            include_term_category_counts=True,
            transform=dense_rank,
            **kwargs
        )
        y_label = initial_category,
        x_label = 'Not ' + initial_category,
        color_func = None
        show_top_terms = True
        show_axes = False
    else:
github JasonKessler / scattertext / scattertext / __init__.py View on Github external
def produce_characteristic_explorer(corpus,
                                    category,
                                    category_name=None,
                                    not_category_name=None,
                                    not_categories=None,
                                    characteristic_scorer=DenseRankCharacteristicness(),
                                    term_ranker=termranking.AbsoluteFrequencyRanker,
                                    term_scorer=RankDifference(),
                                    x_label='Characteristic to Corpus',
                                    y_label=None,
                                    y_axis_labels=None,
                                    scores=None,
                                    vertical_lines=None,
                                    **kwargs):
    '''
    Parameters
    ----------
    corpus : Corpus
        It is highly recommended to use a stoplisted, unigram corpus-- `corpus.get_stoplisted_unigram_corpus()`
    category : str
    category_name : str
    not_category_name : str
    not_categories : list
    characteristic_scorer : CharacteristicScorer
github JasonKessler / scattertext / scattertext / ScatterChart.py View on Github external
def _get_default_scores(self, category, other_categories, df):
        category_column_name = category + ' freq'
        cat_word_counts = df[category_column_name]
        not_cat_word_counts = df[[c + ' freq' for c in other_categories]].sum(axis=1)
        # scores = ScaledFScore.get_scores(cat_word_counts, not_cat_word_counts)
        scores = RankDifference().get_scores(cat_word_counts, not_cat_word_counts)
        return scores
github JasonKessler / scattertext / demo_dense_rank.py View on Github external
convention_df,
    category_col='party',
    text_col='text',
    nlp=whitespace_nlp_with_sentences
).build().get_unigram_corpus().compact(AssociationCompactor(4000))

html = produce_scattertext_explorer(
    corpus,
    category='democrat',
    category_name='Democratic',
    not_category_name='Republican',
    minimum_term_frequency=0,
    pmi_threshold_coefficient=0,
    width_in_pixels=1000,
    metadata=convention_df['speaker'],
    term_scorer=RankDifference(),
    transform=dense_rank
)

open('./demo_dense_rank.html', 'wb').write(html.encode('utf-8'))
print('Open ./demo_dense_rank.html in Chrome or Firefox.')
github JasonKessler / scattertext / scattertext / categoryprojector / OptimalProjection.py View on Github external
projector=lambda n_terms, n_dims: CategoryProjector(
            selector=AssociationCompactor(n_terms, scorer=RankDifference),
            projector=PCA(n_dims)),
        optimizer = morista_index,
github JasonKessler / scattertext / scattertext / categoryprojector / CategoryProjector.py View on Github external
def __init__(self,
                 weighter=LengthNormalizer(),
                 normalizer=StandardScaler(),
                 selector=AssociationCompactor(1000, RankDifference),
                 projector=PCA(2)):
        '''

        :param weighter: instance of an sklearn class with fit_transform to weight X category corpus.
        :param normalizer: instance of an sklearn class with fit_transform to normalize term X category corpus.
        :param selector: instance of a compactor class, if None, no compaction will be done.
        :param projector: instance an sklearn class with fit_transform
        '''
        self.weighter_ = weighter
        self.normalizer_ = normalizer
        self.selector_ = selector
        self.projector_ = projector