How to use the revscoring.features.feature.Feature function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / tests / features / test_functions.py View on Github external
def test_trim():

    d1 = Datasource("derp1")
    f1 = Feature("foobar1", returns=int)
    f2 = Feature("foobar2", returns=int, depends_on=[d1])
    c = Constant(value=5)
    fv = FeatureVector("foobar3", returns=int, depends_on=[c])

    assert list(trim(f1)) == [f1]
    assert list(trim([f1, f2, fv])) == [f1, f2, fv]
    assert list(trim([f1, f2, f1 + f2, fv])) == [f1, f2, fv]
    assert (list(trim(log(max(f1 - f2, 1)))) ==
            [f1, f2])
github wikimedia / revscoring / revscoring / features / page.py View on Github external
from ..datasources import page_creation, revision, site
from .feature import Feature


def process_is_content_namespace(revision_metadata, namespace_map):
    return namespace_map[revision_metadata.page_namespace].content

is_content_namespace = \
    Feature("page.is_content_namespace", process_is_content_namespace,
            returns=bool,
            depends_on=[revision.metadata, site.namespace_map])
"""
Represents whether this page is in a content namespace or not.

:Returns:
    bool

:Example:
    ..code-block:: python

        >>> from revscoring.features import page
        >>> list(extractor.extract(655097130, [page.is_content_namespace]))
        [True]
"""
github wikimedia / revscoring / revscoring / features / temporal / revision_oriented.py View on Github external
def __init__(self, name, revision_datasources):
        super().__init__(name, revision_datasources.user.last_revision)

        self.seconds_since = Feature(
            name + ".seconds_since",
            _process_seconds_since,
            returns=int,
            depends_on=[revision_datasources.user.last_revision.timestamp,
                        revision_datasources.timestamp])
        "`int`: The number of seconds since the user last saved an edit"
github wikimedia / revscoring / revscoring / features / segments_added.py View on Github external
from ..datasources import contiguous_segments_added
from .feature import Feature


def process(contiguous_segments_added):
    return len(contiguous_segments_added)

segments_added = Feature("segments_added", process,
                         returns=int, depends_on=[contiguous_segments_added])
github wikimedia / revscoring / revscoring / features / temporal / revision_oriented.py View on Github external
def __init__(self, name, revision_datasources):
        super().__init__(name)
        self.datasources = revision_datasources.user

        if hasattr(self.datasources, 'info'):
            self.seconds_since_registration = Feature(
                name + ".seconds_since_registration",
                _process_seconds_since_registration,
                returns=int,
                depends_on=[revision_datasources.user.id,
                            revision_datasources.user.info.registration,
                            revision_datasources.timestamp])
            """
            `int` : The number of seconds since the user registered their
            account -- or zero in the case of anons.  If the user has a
            registration date that is *after* the revision timestamp
            (should be implossible, but happens sometimes), the user is assumed
            to be 1 year old.
            """

        if hasattr(self.datasources, 'last_revision'):
            self.last_revision = LastUserRevision(
github wikimedia / revscoring / revscoring / features / wikibase / features / diff.py View on Github external
self.badges_removed = aggregators.len(self.datasources.badges_removed)
        "`int` : The number of badges removed"
        self.badges_changed = aggregators.len(self.datasources.badges_changed)
        "`int` : The number of badges changed"

        # AF/38
        self.proportion_of_qid_added = Feature(
            name + ".proportion_of_qid_added",
            _process_proportion_of_qid_added,
            returns=float, depends_on=[self.datasources.parent_entity,
                                       self.datasources.revision_entity]
        )
        "`int` : The proportion of Q# added."

        # AF/38
        self.proportion_of_language_added = Feature(
            name + ".proportion_of_language_added",
            _process_proportion_of_language_added,
            returns=float, depends_on=[self.datasources.parent_entity,
                                       self.datasources.revision_entity]
        )
        "`int` : The proportion of language added."

        self.proportion_of_links_added = Feature(
            name + ".proportion_of_links_added",
            _process_proportion_of_links_added,
            returns=float, depends_on=[self.datasources.parent_entity,
                                       self.datasources.revision_entity]
        )
        "`int` : The proportion of links added."

        self.identifiers_changed = Feature(
github wikimedia / revscoring / revscoring / features / wikibase / features / revision_oriented.py View on Github external
The name of a property (usually preceeded by "P")
            value : `mixed`
                The value to match
            name : `str`
                A name to associate with the Feature. If not set, the
                feature's name will be
                'has_property_value(, )'
        """
        if name is None:
            name = self._name + ".has_property_value({0}, {1})" \
                .format(repr(property), repr(value))

        return HasPropertyValue(name, property, value, self.datasources.entity)


class HasPropertyValue(Feature):
    def __init__(self, name, property, value, item_datasource):
        self.property = property
        self.value = value
        super().__init__(name, self._process, returns=bool,
                         depends_on=[item_datasource])

    def _process(self, item):
        statements = item.properties.get(self.property, [])
        return self.value in (str(s.claim.datavalue) for s in statements)
github wikimedia / revscoring / revscoring / features / page_age_in_seconds.py View on Github external
from ..datasources import first_revision_metadata, revision_metadata
from .feature import Feature


def process(first_revision_metadata, revision_metadata):
    
    return revision_metadata.timestamp - first_revision_metadata.timestamp

page_age_in_seconds = Feature("page_age_in_seconds", process,
                              returns=int,
                              depends_on=[first_revision_metadata,
                                          revision_metadata])
github wikimedia / revscoring / revscoring / features / is_section_comment.py View on Github external
import re

from ..datasources import revision_metadata
from .feature import Feature

SECTION_COMMENT_RE = re.compile(r"\/\*([^\*]|\*[^\/])+\*\/")

def process(revision_metadata):
    
    if revision_metadata.comment is not None:
        return SECTION_COMMENT_RE.match(revision_metadata.comment) is not None
    else:
        return False

is_section_comment = Feature("is_section_comment", process,
                             returns=bool, depends_on=[revision_metadata])
github wikimedia / revscoring / revscoring / features / feature.py View on Github external
def validate(self, value):
        if isinstance(value, self.returns):
            return value
        else:
            raise ValueError("Expected {0}, but got {1} instead."
                             .format(self.returns, type(value)))

    @classmethod
    def or_constant(self, val):
        if isinstance(val, Feature):
            return val
        else:
            return Constant(val)


class Constant(Feature):
    """
    A special sub-type of `revscoring.Feature` that returns a constant value.

    :Parameters:
        value : `mixed`
            Any type of potential feature value
        name : `str`
            A name to give the feature
    """

    def __init__(self, value, name=None):
        self.value = value
        if name is None:
            name = str(value)
        super().__init__(name, self._process,
                         returns=type(value), depends_on=[])