Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.words_removed = self.tokens_removed_in_types(
{'word'}, name=self._name + ".words_removed"
)
"""
A list of word tokens removed in the edit
"""
self.uppercase_words_added = filters.filter(
is_uppercase_word, self.words_added,
name=self._name + ".uppercase_words_added"
)
"""
A list of fully UPPERCASE word tokens added in the edit
"""
self.uppercase_words_removed = filters.filter(
is_uppercase_word, self.words_removed,
name=self._name + ".uppercase_words_removed"
)
"""
A list of fully UPPERCASE word tokens removed in the edit
"""
self.punctuations_added = self.tokens_added_in_types(
{'period', 'qmark', 'epoint', 'comma', 'colon', 'scolon',
'japan_punct'},
name=self._name + ".punctuations_added"
)
"""
A list of punctuation tokens added in the edit
"""
self.dictionary_check = dictionary_check
# Based on edit.diff
self.dict_words_added = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_added),
name=name + ".dict_words_added"
)
self.dict_words_removed = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_removed),
name=name + ".dict_words_removed"
)
self.non_dict_words_added = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_added),
name=name + ".non_dict_words_added", inverse=True
)
self.non_dict_words_removed = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_removed),
name=name + ".non_dict_words_removed", inverse=True
)
# Frequencies
self.dict_word_delta = frequencies.delta(
revision.parent.dict_word_frequency,
revision.dict_word_frequency,
name=name + ".dict_word_delta"
)
self.non_dict_word_delta = frequencies.delta(
revision.parent.non_dict_word_frequency,
revision.non_dict_word_frequency,
name=name + ".non_dict_word_delta"
)
self.is_stopword = is_stopword
# Based on edit.diff
self.stopwords_added = filters.filter(
is_stopword, wikitext_diff.words_added,
name=name + ".diff.stopwords_added"
)
self.stopwords_removed = filters.filter(
is_stopword, wikitext_diff.words_removed,
name=name + ".diff.stopwords_removed"
)
self.non_stopwords_added = filters.filter(
is_stopword, wikitext_diff.words_added,
name=name + ".diff.non_stopwords_added", inverse=True
)
self.non_stopwords_removed = filters.filter(
is_stopword, wikitext_diff.words_removed,
name=name + ".diff.non_stopwords_removed", inverse=True
)
# Frequencies
self.stopword_delta = frequencies.delta(
revision.parent.stopword_frequency,
revision.stopword_frequency,
name=name + ".diff.stopword_delta"
)
self.non_stopword_delta = frequencies.delta(
revision.parent.non_stopword_frequency,
revision.non_stopword_frequency,
name=name + ".diff.non_stopword_delta"
)
def tokens_removed_in_types(self, types, name=None):
"""
Constructs a :class:`revscoring.Datasource` that represents tokens
removed that are within a set of types.
"""
types = set(types)
if name is None:
name = "{0}({1})".format(self._name + ".tokens_removed_in_types",
types)
return filters.filter(TokenIsInTypes(types).filter,
self.tokens_removed, name=name)
def tokens_added_in_types(self, types, name=None):
"""
Constructs a :class:`revscoring.Datasource` that represents tokens
added that are within a set of types.
"""
types = set(types)
if name is None:
name = "{0}({1})".format(self._name + ".tokens_added_in_types",
types)
return filters.filter(TokenIsInTypes(types).filter, self.tokens_added,
name=name)
:Parameters:
numbers_datasource : :class:`revscoring.Datasource`
A datasource that generates the subset of numbers that are positive
name : `str`
A name for the datasource.
"""
def __init__(self, numbers_datasource, name=None):
name = self._format_name(name, [numbers_datasource])
super().__init__(self.is_positive, numbers_datasource, name=name)
def is_positive(self, v):
return v > 0
class negative(filter):
"""
Generates a filtered list of negative numbers from a list of numbers.
:Parameters:
numbers_datasource : :class:`revscoring.Datasource`
A datasource that generates the subset of numbers that are negative
name : `str`
A name for the datasource.
"""
def __init__(self, numbers_datasource, name=None):
name = self._format_name(name, [numbers_datasource])
super().__init__(self.is_negative, numbers_datasource, name=name)
def is_negative(self, v):
return v < 0
def __init__(self, name, dictionary_check, wikitext_diff, revision):
super().__init__(name)
self.dictionary_check = dictionary_check
# Based on edit.diff
self.dict_words_added = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_added),
name=name + ".dict_words_added"
)
self.dict_words_removed = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_removed),
name=name + ".dict_words_removed"
)
self.non_dict_words_added = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_added),
name=name + ".non_dict_words_added", inverse=True
)
self.non_dict_words_removed = filters.filter(
dictionary_check, mappers.map(str, wikitext_diff.words_removed),
name=name + ".non_dict_words_removed", inverse=True
)
# Frequencies
self.dict_word_delta = frequencies.delta(
revision.parent.dict_word_frequency,
revision.dict_word_frequency,
def __init__(self, name, is_stopword, wikitext_diff, revision):
super().__init__(name)
self.is_stopword = is_stopword
# Based on edit.diff
self.stopwords_added = filters.filter(
is_stopword, wikitext_diff.words_added,
name=name + ".diff.stopwords_added"
)
self.stopwords_removed = filters.filter(
is_stopword, wikitext_diff.words_removed,
name=name + ".diff.stopwords_removed"
)
self.non_stopwords_added = filters.filter(
is_stopword, wikitext_diff.words_added,
name=name + ".diff.non_stopwords_added", inverse=True
)
self.non_stopwords_removed = filters.filter(
is_stopword, wikitext_diff.words_removed,
name=name + ".diff.non_stopwords_removed", inverse=True
)
# Frequencies
self.stopword_delta = frequencies.delta(
revision.parent.stopword_frequency,
revision.stopword_frequency,
images_in_templates + (side_by_side_image_templates * 2) + \
images_in_tags + infobox_images
# References
def filter_paragraphs_without_ref_tags(segment):
"Check to see if we have at least 10 words and no refs"
words = 0
refs = 0
for t in segment.tokens():
words += t.type == "word"
refs += t.type in ("ref_open", "ref_close", "ref_singleton")
return words > 10 and refs == 0
paragraphs_without_refs = filters.filter(
filter_paragraphs_without_ref_tags,
wikitext.revision.datasources.paragraphs_sentences_and_whitespace,
name="ptwiki.revision.paragraphs_without_refs"
)
paragraphs_without_refs_total_length = aggregators.sum(
mappers.map(len, mappers.map(str, paragraphs_without_refs)),
name="ptwiki.revision.paragraphs_without_refs_total_length"
)
local_wiki = [
all_images,
all_images / max(wikitext.revision.content_chars, 1),
category_links,
category_links / max(wikitext.revision.content_chars, 1),
all_ref_tags,