Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self,
term_ranker=AbsoluteFrequencyRanker,
background_frequencies=DefaultBackgroundFrequencies,
rerank_ranks=False):
'''
Parameters
----------
term_ranker : TermRanker, default is OncePerDocFrequencyRanker
background_frequencies : BackgroundFrequencies
rerank_ranks : bool, False by default
orders scores from 0 to 1 by their dense rank
'''
self.term_ranker = term_ranker
self.background_frequencies = background_frequencies
self.rerank_ranks = rerank_ranks
def __init__(self,
term_doc_matrix,
category_a_list,
category_b_list,
not_category_a_list,
not_category_b_list,
labels=None,
term_ranker=AbsoluteFrequencyRanker,
scorer=None):
'''
Parameters
----------
term_doc_matrix : TermDocMatrix
TermDocMatrix (or descendant) which will be used in constructing square.
category_a_list : list
Category names for term A
category_b_list : list
Category names for term B (in opposition to A)
not_category_a_list : list
List of category names that belong to not A
not_category_b_list : list
List of category names that belong to not A
labels : dict
None by default. Labels are dictionary of {'a_and_b': 'A and B', ...} to be shown
def __init__(self,
minimum_term_frequency=3,
minimum_not_category_term_frequency=0,
jitter=None,
seed=0,
pmi_threshold_coefficient=3,
max_terms=None,
filter_unigrams=False,
term_ranker=AbsoluteFrequencyRanker,
use_non_text_features=False,
term_significance=None,
terms_to_include=None,
score_transform=percentile_min):
'''
Parameters
----------
term_doc_matrix : TermDocMatrix
The term doc matrix to use for the scatter chart.
minimum_term_frequency : int, optional
Minimum times an ngram has to be seen to be included. Default is 3.
minimum_not_category_term_frequency : int, optional
If an n-gram does not occur in the category, minimum times it
must been seen to be included. Default is 0.
jitter : float, optional
def __init__(self,
term_ranker=AbsoluteFrequencyRanker,
minimum_term_count=0,
slack=1):
'''
Parameters
----------
term_ranker : TermRanker
Default AbsoluteFrequencyRanker
minimum_term_count : int
Default 0
slack : int
Default 1
'''
self.term_ranker = term_ranker
self.minimum_term_count = minimum_term_count
def __init__(self,
term_doc_matrix,
category_a,
category_b,
neutral_categories,
labels=None,
term_ranker=AbsoluteFrequencyRanker,
scorer=None):
'''
Parameters
----------
term_doc_matrix : TermDocMatrix
TermDocMatrix (or descendant) which will be used in constructing square.
category_a : str
Category name for term A
category_b : str
Category name for term B (in opposition to A)
neutral_categories : list[str]
List of category names that A and B will be contrasted to. Should be in same domain.
labels : dict
None by default. Labels are dictionary of {'a_and_b': 'A and B', ...} to be shown
above each category.
term_ranker : TermRanker
return ScatterplotStructure instead of html
Returns
-------
str
html of visualization
'''
color = None
if singleScoreMode or word_vec_use_p_vals:
color = 'd3.interpolatePurples'
if singleScoreMode or not sort_by_dist:
sort_by_dist = False
else:
sort_by_dist = True
if term_ranker is None:
term_ranker = termranking.AbsoluteFrequencyRanker
category_name, not_category_name = get_category_names(category, category_name, not_categories, not_category_name)
if not_categories is None:
not_categories = [c for c in corpus.get_categories() if c != category]
if term_scorer:
scores = get_term_scorer_scores(category, corpus, neutral_categories, not_categories, show_neutral, term_ranker,
term_scorer, use_non_text_features)
if pmi_filter_thresold is not None:
pmi_threshold_coefficient = pmi_filter_thresold
warnings.warn(
"The argument name 'pmi_filter_thresold' has been deprecated. Use 'pmi_threshold_coefficient' in its place",
DeprecationWarning)
def __init__(self, alpha_w=0.001, ranker=AbsoluteFrequencyRanker):
'''
Parameters
----------
alpha_w : np.float
The constant prior.
'''
self.alpha_w = alpha_w
def __init__(self, corpus, *args, **kwargs):
self.corpus_ = corpus
self.category_ids_ = corpus._y
self.tdf_ = None
self._set_scorer_args(**kwargs)
self.term_ranker_ = AbsoluteFrequencyRanker(corpus)
self.use_metadata_ = False
self.category_name_is_set_ = False
def produce_scattertext_html(term_doc_matrix,
category,
category_name,
not_category_name,
protocol='https',
minimum_term_frequency=DEFAULT_MINIMUM_TERM_FREQUENCY,
pmi_threshold_coefficient=DEFAULT_PMI_THRESHOLD_COEFFICIENT,
max_terms=None,
filter_unigrams=False,
height_in_pixels=None,
width_in_pixels=None,
term_ranker=termranking.AbsoluteFrequencyRanker):
'''Returns html code of visualization.
Parameters
----------
term_doc_matrix : TermDocMatrix
Corpus to use
category : str
name of category column
category_name: str
name of category to mine for
not_category_name: str
name of everything that isn't in category
protocol : str
optional, used prototcol of , http or https
minimum_term_frequency : int, optional
Minimum number of times word needs to appear to make it into visualization.