How to use the revscoring.datasources.meta.frequencies.delta function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / revscoring / languages / features / regex_matches / datasources.py View on Github external
segments_removed = wikitext_diff.segments_removed

        self.matches_added = extractors.regex(
            regexes, segments_added,
            name=name + ".matches_added",
            exclusions=exclusions,
            wrapping=wrapping
        )
        self.matches_removed = extractors.regex(
            regexes, segments_removed,
            name=name + ".matches_removed",
            exclusions=exclusions,
            wrapping=wrapping
        )

        self.match_delta = frequencies.delta(
            revision.parent.match_frequency,
            revision.match_frequency,
            name=name + ".match_delta"
        )
        self.match_prop_delta = frequencies.prop_delta(
            revision.parent.match_frequency,
            self.match_delta,
            name=name + ".match_prop_delta"
        )
github wikimedia / revscoring / revscoring / features / wikitext / tokenized / diff / datasources.py View on Github external
name=prefix + ".number_delta"
)
"""
A number frequency delta table
"""

number_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.number_frequency,
    number_delta,
    name=prefix + ".number_prop_delta"
)
"""
A number proportional frequency delta table
"""

whitespace_delta = frequencies.delta(
    revision.parent.datasources.whitespace_frequency,
    revision.datasources.whitespace_frequency,
    name=prefix + ".whitespace_delta"
)
"""
A whitespace frequency delta table
"""

whitespace_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.whitespace_frequency,
    whitespace_delta,
    name=prefix + ".whitespace_prop_delta"
)
"""
A whitespace proportional frequency delta table
"""
github wikimedia / revscoring / revscoring / features / wikitext / datasources / tokenized.py View on Github external
name=self._name + ".whitespace_delta"
        )
        """
        A whitespace frequency delta table
        """

        self.whitespace_prop_delta = frequencies.prop_delta(
            self.revision.parent.whitespace_frequency,
            self.whitespace_delta,
            name=self._name + ".whitespace_prop_delta"
        )
        """
        A whitespace proportional frequency delta table
        """

        self.markup_delta = frequencies.delta(
            self.revision.parent.markup_frequency,
            self.revision.markup_frequency,
            name=self._name + ".markup_delta"
        )
        """
        A markup frequency delta table
        """

        self.markup_prop_delta = frequencies.prop_delta(
            self.revision.parent.markup_frequency,
            self.markup_delta,
            name=self._name + ".markup_prop_delta"
        )
        """
        A markup proportional frequency delta table
        """
github wikimedia / revscoring / revscoring / features / wikitext / datasources / tokenized.py View on Github external
def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.token_delta = frequencies.delta(
            self.revision.parent.token_frequency,
            self.revision.token_frequency,
            name=self._name + ".token_delta"
        )
        """
        A token frequency delta table
        """

        self.token_prop_delta = frequencies.prop_delta(
            self.revision.parent.token_frequency,
            self.token_delta,
            name=self._name + ".token_prop_delta"
        )
        """
        A token proportional frequency delta table
        """
github wikimedia / revscoring / revscoring / features / wikitext / tokenized / diff / datasources.py View on Github external
name=prefix + ".punctuation_delta"
)
"""
A punctuation frequency delta table
"""

punctuation_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.punctuation_frequency,
    punctuation_delta,
    name=prefix + ".punctuation_prop_delta"
)
"""
A punctuation proportional frequency delta table
"""

break_delta = frequencies.delta(
    revision.parent.datasources.break_frequency,
    revision.datasources.break_frequency,
    name=prefix + ".break_delta"
)
"""
A break frequency delta table
"""

break_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.break_frequency,
    break_delta,
    name=prefix + ".break_prop_delta"
)
"""
A break proportional frequency delta table
github wikimedia / revscoring / revscoring / languages / features / dictionary / datasources.py View on Github external
)
        self.dict_words_removed = filters.filter(
            dictionary_check, mappers.map(str, wikitext_diff.words_removed),
            name=name + ".dict_words_removed"
        )
        self.non_dict_words_added = filters.filter(
            dictionary_check, mappers.map(str, wikitext_diff.words_added),
            name=name + ".non_dict_words_added", inverse=True
        )
        self.non_dict_words_removed = filters.filter(
            dictionary_check, mappers.map(str, wikitext_diff.words_removed),
            name=name + ".non_dict_words_removed", inverse=True
        )

        # Frequencies
        self.dict_word_delta = frequencies.delta(
            revision.parent.dict_word_frequency,
            revision.dict_word_frequency,
            name=name + ".dict_word_delta"
        )
        self.non_dict_word_delta = frequencies.delta(
            revision.parent.non_dict_word_frequency,
            revision.non_dict_word_frequency,
            name=name + ".non_dict_word_delta"
        )

        self.dict_word_prop_delta = frequencies.prop_delta(
            revision.parent.dict_word_frequency, self.dict_word_delta,
            name=name + ".dict_word_prop_delta"
        )
        self.non_dict_word_prop_delta = frequencies.prop_delta(
            revision.parent.non_dict_word_frequency,
github wikimedia / revscoring / revscoring / features / wikitext / tokenized / diff / datasources.py View on Github external
name=prefix + ".word_delta"
)
"""
A lower-cased word frequency delta table
"""

word_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.word_frequency,
    word_delta,
    name=prefix + ".word_prop_delta"
)
"""
A lower-cased word proportional frequency delta table
"""

uppercase_word_delta = frequencies.delta(
    revision.parent.datasources.uppercase_word_frequency,
    revision.datasources.uppercase_word_frequency,
    name=prefix + ".uppercase_word_delta"
)
"""
A uppercase word frequency delta table
"""

uppercase_word_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.uppercase_word_frequency,
    uppercase_word_delta,
    name=prefix + ".uppercase_word_prop_delta"
)
"""
A uppercase word proportional frequency delta table
"""
github wikimedia / revscoring / revscoring / features / wikitext / tokenized / diff / datasources.py View on Github external
name=prefix + ".token_delta"
)
"""
A token frequency delta table
"""

token_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.token_frequency,
    token_delta,
    name=prefix + ".token_prop_delta"
)
"""
A token proportional frequency delta table
"""

number_delta = frequencies.delta(
    revision.parent.datasources.number_frequency,
    revision.datasources.number_frequency,
    name=prefix + ".number_delta"
)
"""
A number frequency delta table
"""

number_prop_delta = frequencies.prop_delta(
    revision.parent.datasources.number_frequency,
    number_delta,
    name=prefix + ".number_prop_delta"
)
"""
A number proportional frequency delta table
"""
github wikimedia / revscoring / revscoring / features / wikitext / datasources / tokenized.py View on Github external
name=self._name + ".punctuation_delta"
        )
        """
        A punctuation frequency delta table
        """

        self.punctuation_prop_delta = frequencies.prop_delta(
            self.revision.parent.punctuation_frequency,
            self.punctuation_delta,
            name=self._name + ".punctuation_prop_delta"
        )
        """
        A punctuation proportional frequency delta table
        """

        self.break_delta = frequencies.delta(
            self.revision.parent.break_frequency,
            self.revision.break_frequency,
            name=self._name + ".break_delta"
        )
        """
        A break frequency delta table
        """

        self.break_prop_delta = frequencies.prop_delta(
            self.revision.parent.break_frequency,
            self.break_delta,
            name=self._name + ".break_prop_delta"
        )
        """
        A break proportional frequency delta table
github wikimedia / revscoring / revscoring / features / wikitext / datasources / tokenized.py View on Github external
name=self._name + ".word_delta"
        )
        """
        A lower-cased word frequency delta table
        """

        self.word_prop_delta = frequencies.prop_delta(
            self.revision.parent.word_frequency,
            self.word_delta,
            name=self._name + ".word_prop_delta"
        )
        """
        A lower-cased word proportional frequency delta table
        """

        self.uppercase_word_delta = frequencies.delta(
            self.revision.parent.uppercase_word_frequency,
            self.revision.uppercase_word_frequency,
            name=self._name + ".uppercase_word_delta"
        )
        """
        A uppercase word frequency delta table
        """

        self.uppercase_word_prop_delta = frequencies.prop_delta(
            self.revision.parent.uppercase_word_frequency,
            self.uppercase_word_delta,
            name=self._name + ".uppercase_word_prop_delta"
        )
        """
        A uppercase word proportional frequency delta table
        """