How to use the scattertext.termranking.AbsoluteFrequencyRanker function in scattertext

To help you get started, we’ve selected a few scattertext examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github JasonKessler / scattertext / scattertext / characteristic / DenseRankCharacteristicness.py View on Github external
def __init__(self,
	             term_ranker=AbsoluteFrequencyRanker,
	             background_frequencies=DefaultBackgroundFrequencies,
	             rerank_ranks=False):
		'''
		Parameters
		----------
		term_ranker : TermRanker, default is OncePerDocFrequencyRanker
		background_frequencies : BackgroundFrequencies
		rerank_ranks : bool, False by default
			orders scores from 0 to 1 by their dense rank
		'''
		self.term_ranker = term_ranker
		self.background_frequencies = background_frequencies
		self.rerank_ranks = rerank_ranks
github JasonKessler / scattertext / scattertext / semioticsquare / FourSquare.py View on Github external
def __init__(self,
	             term_doc_matrix,
	             category_a_list,
	             category_b_list,
	             not_category_a_list,
	             not_category_b_list,
	             labels=None,
	             term_ranker=AbsoluteFrequencyRanker,
	             scorer=None):
		'''
		Parameters
		----------
		term_doc_matrix : TermDocMatrix
			TermDocMatrix (or descendant) which will be used in constructing square.
		category_a_list : list
			Category names for term A
		category_b_list : list
			Category names for term B (in opposition to A)
		not_category_a_list : list
			List of category names that belong to not A
		not_category_b_list : list
			List of category names that belong to not A
		labels : dict
			None by default. Labels are dictionary of {'a_and_b': 'A and B', ...} to be shown
github JasonKessler / scattertext / scattertext / ScatterChartData.py View on Github external
def __init__(self,
	             minimum_term_frequency=3,
	             minimum_not_category_term_frequency=0,
	             jitter=None,
	             seed=0,
	             pmi_threshold_coefficient=3,
	             max_terms=None,
	             filter_unigrams=False,
	             term_ranker=AbsoluteFrequencyRanker,
	             use_non_text_features=False,
	             term_significance=None,
	             terms_to_include=None,
				 score_transform=percentile_min):
		'''

		Parameters
		----------
		term_doc_matrix : TermDocMatrix
			The term doc matrix to use for the scatter chart.
		minimum_term_frequency : int, optional
			Minimum times an ngram has to be seen to be included. Default is 3.
		minimum_not_category_term_frequency : int, optional
		  If an n-gram does not occur in the category, minimum times it
		   must been seen to be included. Default is 0.
		jitter : float, optional
github JasonKessler / scattertext / scattertext / termcompaction / CompactTerms.py View on Github external
def __init__(self,
	             term_ranker=AbsoluteFrequencyRanker,
	             minimum_term_count=0,
	             slack=1):
		'''

		Parameters
		----------
		term_ranker : TermRanker
			Default AbsoluteFrequencyRanker
		minimum_term_count : int
			Default 0
		slack : int
			Default 1

		'''
		self.term_ranker = term_ranker
		self.minimum_term_count = minimum_term_count
github JasonKessler / scattertext / scattertext / semioticsquare / SemioticSquare.py View on Github external
def __init__(self,
                 term_doc_matrix,
                 category_a,
                 category_b,
                 neutral_categories,
                 labels=None,
                 term_ranker=AbsoluteFrequencyRanker,
                 scorer=None):
        '''
        Parameters
        ----------
        term_doc_matrix : TermDocMatrix
            TermDocMatrix (or descendant) which will be used in constructing square.
        category_a : str
            Category name for term A
        category_b : str
            Category name for term B (in opposition to A)
        neutral_categories : list[str]
            List of category names that A and B will be contrasted to.  Should be in same domain.
        labels : dict
            None by default. Labels are dictionary of {'a_and_b': 'A and B', ...} to be shown
            above each category.
        term_ranker : TermRanker
github JasonKessler / scattertext / scattertext / __init__.py View on Github external
return ScatterplotStructure instead of html
    Returns
    -------
    str
    html of visualization

    '''
    color = None
    if singleScoreMode or word_vec_use_p_vals:
        color = 'd3.interpolatePurples'
    if singleScoreMode or not sort_by_dist:
        sort_by_dist = False
    else:
        sort_by_dist = True
    if term_ranker is None:
        term_ranker = termranking.AbsoluteFrequencyRanker

    category_name, not_category_name = get_category_names(category, category_name, not_categories, not_category_name)

    if not_categories is None:
        not_categories = [c for c in corpus.get_categories() if c != category]

    if term_scorer:
        scores = get_term_scorer_scores(category, corpus, neutral_categories, not_categories, show_neutral, term_ranker,
                                        term_scorer, use_non_text_features)

    if pmi_filter_thresold is not None:
        pmi_threshold_coefficient = pmi_filter_thresold
        warnings.warn(
            "The argument name 'pmi_filter_thresold' has been deprecated. Use 'pmi_threshold_coefficient' in its place",
            DeprecationWarning)
github JasonKessler / scattertext / scattertext / termsignificance / LogOddsRatioUninformativeDirichletPrior.py View on Github external
	def __init__(self, alpha_w=0.001, ranker=AbsoluteFrequencyRanker):
		'''
		Parameters
		----------
		alpha_w : np.float
			The constant prior.
		'''
		self.alpha_w = alpha_w
github JasonKessler / scattertext / scattertext / termscoring / CorpusBasedTermScorer.py View on Github external
def __init__(self, corpus, *args, **kwargs):
        self.corpus_ = corpus
        self.category_ids_ = corpus._y
        self.tdf_ = None
        self._set_scorer_args(**kwargs)
        self.term_ranker_ = AbsoluteFrequencyRanker(corpus)
        self.use_metadata_ = False
        self.category_name_is_set_ = False
github JasonKessler / scattertext / scattertext / __init__.py View on Github external
def produce_scattertext_html(term_doc_matrix,
                             category,
                             category_name,
                             not_category_name,
                             protocol='https',
                             minimum_term_frequency=DEFAULT_MINIMUM_TERM_FREQUENCY,
                             pmi_threshold_coefficient=DEFAULT_PMI_THRESHOLD_COEFFICIENT,
                             max_terms=None,
                             filter_unigrams=False,
                             height_in_pixels=None,
                             width_in_pixels=None,
                             term_ranker=termranking.AbsoluteFrequencyRanker):
    '''Returns html code of visualization.

    Parameters
    ----------
    term_doc_matrix : TermDocMatrix
        Corpus to use
    category : str
        name of category column
    category_name: str
        name of category to mine for
    not_category_name: str
        name of everything that isn't in category
    protocol : str
        optional, used prototcol of , http or https
    minimum_term_frequency : int, optional
        Minimum number of times word needs to appear to make it into visualization.