How to use the revscoring.features.meta.aggregators.len function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / revscoring / features / wikibase / diff / diff.py View on Github external
import re

from . import datasources
from ...feature import Feature
from ...meta import aggregators

revision_item = datasources.revision_item
parent_item = datasources.parent_item

# Sitelinks
sitelinks_added = aggregators.len(datasources.sitelinks_added)
sitelinks_removed = aggregators.len(datasources.sitelinks_removed)
sitelinks_changed = aggregators.len(datasources.sitelinks_changed)

# Labels
labels_added = aggregators.len(datasources.labels_added)
labels_removed = aggregators.len(datasources.labels_removed)
labels_changed = aggregators.len(datasources.labels_changed)

# Aliases
aliases_added = aggregators.len(datasources.aliases_added)
aliases_removed = aggregators.len(datasources.aliases_removed)
aliases_changed = aggregators.len(datasources.aliases_changed)

# Descriptions
descriptions_added = aggregators.len(datasources.descriptions_added)
descriptions_removed = aggregators.len(datasources.descriptions_removed)
descriptions_changed = aggregators.len(datasources.descriptions_changed)

# Claims
claims_added = aggregators.len(datasources.claims_added)
claims_removed = aggregators.len(datasources.claims_removed)
github wikimedia / revscoring / revscoring / features / wikitext / edit / diff / tokens.py View on Github external
datasources.numbers_added,
    name=prefix + ".numbers_added"
)
"""
A count of the number tokens added in this edit.
"""

numbers_removed = aggregators.len(
    datasources.numbers_removed,
    name=prefix + ".numbers_removed"
)
"""
A count of the number tokens removed in this edit.
"""

markups_added = aggregators.len(
    datasources.markups_added,
    name=prefix + ".markups_added"
)
"""
A count of the markup tokens added in this edit.
"""

markups_removed = aggregators.len(
    datasources.markups_removed,
    name=prefix + ".markups_removed"
)
"""
A count of the markup tokens removed in this edit.
"""

whitespaces_added = aggregators.len(
github wikimedia / revscoring / revscoring / features / wikibase / features / diff.py View on Github external
self.properties_removed = \
            aggregators.len(self.datasources.properties_removed)
        "`int` : The number of properties removed"
        self.properties_changed = \
            aggregators.len(self.datasources.properties_changed)
        "`int` : The number of properties changed"

        # Claims
        self.statements_added = \
            aggregators.len(self.datasources.statements_added)
        "`int` : The number of statements/claims added"
        self.claims_added = \
            aggregators.len(self.datasources.claims_added)  # Backwards compatible
        "`int` : The number of statements/claims added"
        self.statements_removed = \
            aggregators.len(self.datasources.statements_removed)
        "`int` : The number of statements/claims removed"
        self.claims_removed = \
            aggregators.len(self.datasources.claims_removed)  # Backwards compatible
        "`int` : The number of statements/claims removed"
        self.statements_changed = \
            aggregators.len(self.datasources.statements_changed)
        "`int` : The number of statements/claims changed"
        self.claims_changed = \
            aggregators.len(self.datasources.claims_changed)  # Backwards compatible
        "`int` : The number of statements/claims changed"

        # Sources
        self.sources_added = aggregators.len(self.datasources.sources_added)
        "`int` : The number of sources added"
        self.sources_removed = \
            aggregators.len(self.datasources.sources_removed)
github wikimedia / revscoring / revscoring / features / wikibase / features / revision_oriented.py View on Github external
def __init__(self, name, revision_datasources):
        super().__init__(name)

        self.datasources = revision_datasources

        self.sitelinks = aggregators.len(self.datasources.sitelinks)
        "`int` : A count of sitelinks in the revision"
        self.labels = aggregators.len(self.datasources.labels)
        "`int` : A count of labels in the revision"
        self.aliases = aggregators.len(self.datasources.aliases)
        "`int` : A count of aliases in the revision"
        self.descriptions = aggregators.len(self.datasources.descriptions)
        "`int` : A count of descriptions in the revision"
        self.properties = aggregators.len(self.datasources.properties)
        "`int` : A count of properties in the revision"
        self.claims = aggregators.len(self.datasources.claims)
        "`int` : A count of claims in the revision"
        self.sources = aggregators.len(self.datasources.sources)
        "`int` : A count of sources in the revision"
        self.reference_claims = aggregators.len(
                self.datasources.reference_claims)
        "`int` : A count of reference claims in the revision"
        self.qualifiers = aggregators.len(self.datasources.qualifiers)
        "`int` : A count of qualifiers in the revision"
        self.badges = aggregators.len(self.datasources.badges)
        "`int` : A count of badges in the revision"

        if hasattr(self.datasources, "parent"):
github wikimedia / revscoring / revscoring / features / wikibase / diff / diff.py View on Github external
aliases_added = aggregators.len(datasources.aliases_added)
aliases_removed = aggregators.len(datasources.aliases_removed)
aliases_changed = aggregators.len(datasources.aliases_changed)

# Descriptions
descriptions_added = aggregators.len(datasources.descriptions_added)
descriptions_removed = aggregators.len(datasources.descriptions_removed)
descriptions_changed = aggregators.len(datasources.descriptions_changed)

# Claims
claims_added = aggregators.len(datasources.claims_added)
claims_removed = aggregators.len(datasources.claims_removed)
claims_changed = aggregators.len(datasources.claims_changed)

# Sources
sources_added = aggregators.len(datasources.sources_added)
sources_removed = aggregators.len(datasources.sources_removed)

# Qualifiers
qualifiers_added = aggregators.len(datasources.qualifiers_added)
qualifiers_removed = aggregators.len(datasources.qualifiers_removed)

# Badges
badges_added = aggregators.len(datasources.badges_added)
badges_removed = aggregators.len(datasources.badges_removed)
badges_changed = aggregators.len(datasources.badges_changed)


class property_changed(Feature):
    """
    Returns True if a property has changed.
github wikimedia / revscoring / revscoring / features / wikitext / features / tokenized.py View on Github external
def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.tokens = aggregators.len(self.datasources.tokens)
        "`int` : The number of tokens in the revision"
        self.numbers = aggregators.len(self.datasources.numbers)
        "`int` : The number of number tokens in the revision"
        self.whitespaces = aggregators.len(self.datasources.whitespaces)
        "`int` : The number of whitespace tokens in the revision"
        self.markups = aggregators.len(self.datasources.markups)
        "`int` : The number of markup tokens in the revision"
        self.cjks = aggregators.len(self.datasources.cjks)
        "`int` : The number of Chinese/Japanese/Korean tokens in the revision"
        self.entities = aggregators.len(self.datasources.entities)
        "`int` : The number of HTML entity tokens in the revision"
        self.urls = aggregators.len(self.datasources.urls)
        "`int` : The number of URL tokens in the revision"
        self.words = aggregators.len(self.datasources.words)
        "`int` : The number of word tokens in the revision"
        self.uppercase_words = \
            aggregators.len(self.datasources.uppercase_words)
        "`int` : The number of UPPERCASE word tokens in the revision"
        self.punctuations = aggregators.len(self.datasources.punctuations)
        "`int` : The number of punctuation tokens in the revision"
        self.breaks = aggregators.len(self.datasources.breaks)
        "`int` : The number of break tokens in the revision"
        self.longest_token = aggregators.max(
            mappers.map(len, self.datasources.tokens), returns=int)
        "`int` : The longest single token in the revision"
        self.longest_word = aggregators.max(
github wikimedia / revscoring / revscoring / features / wikibase / diff / diff.py View on Github external
revision_item = datasources.revision_item
parent_item = datasources.parent_item

# Sitelinks
sitelinks_added = aggregators.len(datasources.sitelinks_added)
sitelinks_removed = aggregators.len(datasources.sitelinks_removed)
sitelinks_changed = aggregators.len(datasources.sitelinks_changed)

# Labels
labels_added = aggregators.len(datasources.labels_added)
labels_removed = aggregators.len(datasources.labels_removed)
labels_changed = aggregators.len(datasources.labels_changed)

# Aliases
aliases_added = aggregators.len(datasources.aliases_added)
aliases_removed = aggregators.len(datasources.aliases_removed)
aliases_changed = aggregators.len(datasources.aliases_changed)

# Descriptions
descriptions_added = aggregators.len(datasources.descriptions_added)
descriptions_removed = aggregators.len(datasources.descriptions_removed)
descriptions_changed = aggregators.len(datasources.descriptions_changed)

# Claims
claims_added = aggregators.len(datasources.claims_added)
claims_removed = aggregators.len(datasources.claims_removed)
claims_changed = aggregators.len(datasources.claims_changed)

# Sources
sources_added = aggregators.len(datasources.sources_added)
sources_removed = aggregators.len(datasources.sources_removed)
github wikimedia / revscoring / revscoring / features / wikitext / edit / diff / tokens.py View on Github external
datasources.whitespaces_removed,
    name=prefix + ".whitespaces_removed"
)
"""
A count of the whitespace tokens removed in this edit.
"""

cjks_added = aggregators.len(
    datasources.cjks_added,
    name=prefix + ".cjks_added"
)
"""
A count of the cjk tokens added in this edit.
"""

cjks_removed = aggregators.len(
    datasources.cjks_removed,
    name=prefix + ".cjks_removed"
)
"""
A count of the cjk tokens removed in this edit.
"""

entities_added = aggregators.len(
    datasources.entities_added,
    name=prefix + ".entities_added"
)
"""
A count of the entity tokens added in this edit.
"""

entities_removed = aggregators.len(
github wikimedia / revscoring / revscoring / features / wikibase / features / diff.py View on Github external
super().__init__(name)
        self.datasources = datasources

        # Sitelinks
        self.sitelinks_added = \
            aggregators.len(self.datasources.sitelinks_added)
        "`int` : The number of sitelinks added"
        self.sitelinks_removed = \
            aggregators.len(self.datasources.sitelinks_removed)
        "`int` : The number of sitelinks removed"
        self.sitelinks_changed = \
            aggregators.len(self.datasources.sitelinks_changed)
        "`int` : The number of sitelinks changed"

        # Labels
        self.labels_added = aggregators.len(self.datasources.labels_added)
        "`int` : The number of labels added"
        self.labels_removed = aggregators.len(self.datasources.labels_removed)
        "`int` : The number of labels removed"
        self.labels_changed = aggregators.len(self.datasources.labels_changed)
        "`int` : The number of labels changed"

        # Aliases
        self.aliases_added = aggregators.len(self.datasources.aliases_added)
        "`int` : The number of aliases added"
        self.aliases_removed = \
            aggregators.len(self.datasources.aliases_removed)
        "`int` : The number of aliases removed"
        self.aliases_changed = \
            aggregators.len(self.datasources.aliases_changed)
        "`int` : The number of aliases changed"
github wikimedia / revscoring / revscoring / features / wikitext / features / edit_tokens.py View on Github external
)
        "`int` : The number of segments removed "

        self.tokens_added = aggregators.len(
            self.datasources.tokens_added,
            name=self._name + ".tokens_added"
        )
        "`int` : The number of tokens added "

        self.tokens_removed = aggregators.len(
            self.datasources.tokens_removed,
            name=self._name + ".tokens_removed"
        )
        "`int` : The number of tokens removed "

        self.numbers_added = aggregators.len(
            self.datasources.numbers_added,
            name=self._name + ".numbers_added"
        )
        "`int` : The number of number tokens added "

        self.numbers_removed = aggregators.len(
            self.datasources.numbers_removed,
            name=self._name + ".numbers_removed"
        )
        "`int` : The number of number tokens removed "

        self.markups_added = aggregators.len(
            self.datasources.markups_added,
            name=self._name + ".markups_added"
        )
        "`int` : The number of markup tokens added "