How to use the revscoring.features.Feature function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / ores / tests / test_scoring_context.py View on Github external
def test_scoring_context():
    from revscoring.datasources import Datasource
    from revscoring.dependencies import Dependent
    from revscoring.features import Feature

    fake_data = Datasource("fake_data", lambda: "fake")
    len_func = Dependent("len_func")
    literal_fake = Dependent("literal_fake")
    characters = Feature("characters", lambda word, len: len(word),
                         returns=int,
                         depends_on=[fake_data, len_func])
    is_fake = Feature("is_fake", lambda word, fake: word == fake,
                      returns=bool,
                      depends_on=[fake_data, literal_fake])

    FakeExtractor = namedtuple("Extractor", ['extract', 'solve', 'language'])

    def fake_extract(rev_ids, dependents, caches=None):
        caches = caches if caches is not None else {}
        for rev_id in rev_ids:
            if rev_id % 5 != 0:
                cache = caches.get(rev_id, {})
                values = dependencies.solve(dependents,
                                            context={len_func: lambda: len},
                                            cache=cache)
                values = list(values)
                caches[rev_id] = cache
                yield None, values
github wikimedia / revscoring / revscoring / languages / space_delimited / revision.py View on Github external
def __init__(self, language, error_if_missing=False):
        self.language = language
        self.prefix = language.__name__ + "." + self.MODULE_NAME + "."

        self.words_list = TokenFilter(
            self.prefix + "words",
            self.DATASOURCE_MODULE.tokens,
            token_is_word,
            if_none=raise_rnf if error_if_missing else None
        )
        """
        Returns a list of word tokens.
        """

        self.words = Feature(
            self.prefix + "words", len,
            returns=int,
            depends_on=[self.words_list]
        )
        """
        A count of the number of words in the revision.
        """

        self.content_words_list = TokenFilter(
            self.prefix + "content_words",
            self.DATASOURCE_MODULE.content_tokens,
            token_is_word,
            if_none=raise_rnf if error_if_missing else None
        )
        """
        Returns a list of words that appear in the (non-markup) content of the
github wiki-ai / wb-vandalism / wb_vandalism / features / diff.py View on Github external
"number_changed_sitelinks", process_no_changed_sitelinks, returns=int,
    depends_on=[sitelinks_differ])


def process_no_added_labels(labels_differ):
    return len(labels_differ.added())

number_added_labels = Feature(
    "number_added_labels", process_no_added_labels, returns=int,
    depends_on=[labels_differ])


def process_no_removed_labels(labels_differ):
    return len(labels_differ.removed())

number_removed_labels = Feature(
    "number_removed_labels", process_no_removed_labels, returns=int,
    depends_on=[labels_differ])


def process_no_changed_labels(labels_differ):
    return len(labels_differ.changed())

number_changed_labels = Feature(
    "number_changed_labels", process_no_changed_labels, returns=int,
    depends_on=[labels_differ])


def process_no_added_descriptions(descriptions_differ):
    return len(descriptions_differ.added())

number_added_descriptions = Feature(
github wiki-ai / wb-vandalism / wb_vandalism / features / revision.py View on Github external
from wb_vandalism.datasources.parsed_revision_text import item
from revscoring.features import Feature
from .feature import has_property_value
import pywikibase



def process_no_claims(item):
    no_claims = 0
    for property_name in item.claims:
        no_claims += len(item.claims[property_name])
    return no_claims

number_claims = Feature("number_claims", process_no_claims, returns=int,
                        depends_on=[item])


def process_no_aliases(item):
    no_aliases = 0
    for lang in item.aliases:
        no_aliases += len(item.aliases[lang])
    return no_aliases


number_aliases = Feature("number_aliases", process_no_aliases, returns=int,
                        depends_on=[item])


def process_no_sources(item):
    no_sources = 0
github wiki-ai / wb-vandalism / wb_vandalism / features / diff.py View on Github external
depends_on=[badges_differ, current_item, past_item])

# There is no need for changed badges.


def process_mean_distance_desc(parent, current, differ):
    changed = differ.changed()
    if not changed:
        return 0.0
    distance = 0
    for lang in changed:
        distance += (
            1 - ratio(current.descriptions[lang], parent.descriptions[lang]))
    return distance / len(changed)

mean_distance_descriptions = Feature(
    "mean_distance_descriptions", process_mean_distance_desc, returns=float,
    depends_on=[past_item, current_item, descriptions_differ])


def process_mean_distance_labels(parent, current, differ):
    changed = differ.changed()
    if not changed:
        return 0.0
    distance = 0
    for lang in changed:
        distance += 1 - ratio(current.labels[lang], parent.labels[lang])
    return distance / len(changed)

mean_distance_labels = Feature(
    "mean_distance_labels", process_mean_distance_labels, returns=float,
    depends_on=[current_item, past_item, labels_differ])
github wiki-ai / wb-vandalism / wb_vandalism / features / diff.py View on Github external
# There is no need for changed aliases.


def process_no_added_claims(added_claims):
    return len(added_claims)

number_added_claims = Feature(
    "number_added_claims", process_no_added_claims, returns=int,
    depends_on=[added_claims])


def process_no_removed_claims(removed_claims):
    return len(removed_claims)

number_removed_claims = Feature(
    "number_removed_claims", process_no_removed_claims, returns=int,
    depends_on=[removed_claims])


def process_no_changed_claims(changed_claims):
    return len(changed_claims)

number_changed_claims = Feature(
    "number_changed_claims", process_no_changed_claims, returns=int,
    depends_on=[changed_claims])


def process_no_changed_identifiers(changed_claims):
    counter = 0
    for old, new in changed_claims:
        if isinstance(old.target, str):
github wiki-ai / wb-vandalism / wb_vandalism / features / diff.py View on Github external
"number_removed_labels", process_no_removed_labels, returns=int,
    depends_on=[labels_differ])


def process_no_changed_labels(labels_differ):
    return len(labels_differ.changed())

number_changed_labels = Feature(
    "number_changed_labels", process_no_changed_labels, returns=int,
    depends_on=[labels_differ])


def process_no_added_descriptions(descriptions_differ):
    return len(descriptions_differ.added())

number_added_descriptions = Feature(
    "number_added_descriptions", process_no_added_descriptions, returns=int,
    depends_on=[descriptions_differ])


def process_no_removed_descriptions(descriptions_differ):
    return len(descriptions_differ.removed())

number_removed_descriptions = Feature(
    "number_removed_descriptions", process_no_removed_descriptions,
    returns=int, depends_on=[descriptions_differ])


def process_no_changed_descriptions(descriptions_differ):
    return len(descriptions_differ.changed())

number_changed_descriptions = Feature(
github wiki-ai / wb-vandalism / wb_vandalism / features / diff.py View on Github external
P569_changed = has_property_changed('P569')

P18_changed = has_property_changed('P18')

P109_changed = has_property_changed('P109')

P373_changed = has_property_changed('P373')

P856_changed = has_property_changed('P856')


def process_no_added_sources(added_sources):
    return len(added_sources)

number_added_sources = Feature(
    "number_added_sources", process_no_added_sources, returns=int,
    depends_on=[added_sources])


def process_no_removed_sources(removed_sources):
    return len(removed_sources)

number_removed_sources = Feature(
    "number_removed_sources", process_no_removed_sources, returns=int,
    depends_on=[removed_sources])


def process_no_added_qualifiers(added_qualifiers):
    return len(added_qualifiers)

number_added_qualifiers = Feature(
github wikimedia / revscoring / revscoring / languages / space_delimited / revision.py View on Github external
"""
        A count of the number of words in the revision.
        """

        self.content_words_list = TokenFilter(
            self.prefix + "content_words",
            self.DATASOURCE_MODULE.content_tokens,
            token_is_word,
            if_none=raise_rnf if error_if_missing else None
        )
        """
        Returns a list of words that appear in the (non-markup) content of the
        revision.
        """

        self.content_words = Feature(
            self.prefix + "content_words", len,
            returns=int,
            depends_on=[self.content_words_list]
        )
        """
        A count of the number of words in the (non-markup) content of the
        revision.
        """

        if language.resources.stopwords is not None and \
           language.resources.stemmer is not None:
            self.infonoise = Infonoise(
                self.prefix + "infonoise",
                language.resources.stopwords,
                language.resources.stemmer.stem,
                self.content_words_list
github wikimedia / revscoring / revscoring / __init__.py View on Github external
__url__, __version__)
from .datasources import Datasource
from .dependencies import Dependent, DependentSet
from .extractors import Extractor
from .features import Feature, FeatureVector
from .score_processor import ScoreProcessor
from .scoring import Model

if sys.version_info <= (3, 0):
    raise VersionConflict(
        "Revscoring requires Python '>=3' " +
        "but your Python version is " +
        platform.python_version())


__all__ = [Datasource, Dependent, DependentSet, Extractor, Feature,
           FeatureVector, Model, ScoreProcessor,
           __name__, __version__, __author__,
           __author_email__, __description__, __url__]