How to use the revscoring.datasources.datasource.Datasource function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wiki-ai / wb-vandalism / wb_vandalism / datasources / diff.py View on Github external
past_item])


def process_changed_claims(claims_differ, current_item, past_item):
    changed_claims = []
    for p_number in claims_differ.changed():
        parent_guids = {claim.snak:claim
                        for claim in past_item.claims[p_number]}
        for claim in current_item.claims[p_number]:
            if claim.snak in parent_guids and \
               claim not in past_item.claims[p_number]:
                changed_claims.append(tuple([parent_guids[claim.snak], claim]))

    return changed_claims

changed_claims = Datasource("diff.changed_claims", process_changed_claims,
                            depends_on=[claims_differ, current_item,
                                        past_item])


def process_added_sources(changed_claims):
    added_sources = []
    for old_claim, new_claim in changed_claims:
        parent_guids = []
        for source in old_claim.sources:
            for p_number in source:
                parent_guids += [claim.hash for claim in source[p_number]]
        for source in new_claim.sources:
            for p_number in source:
                for claim in source[p_number]:
                    if claim.hash not in parent_guids:
                        added_sources.append(claim)
github wikimedia / revscoring / revscoring / datasources / contiguous_segments_added.py View on Github external
from collections import namedtuple

from .datasource import Datasource
from .revision_diff import revision_diff


def process(revision_diff):
    
    operations, a, b = revision_diff
    
    return ["".join(b[op.b1:op.b2])
            for op in operations\
            if op.name == "insert"]

contiguous_segments_added = Datasource("contiguous_segments_added", process,
                                       depends_on=[revision_diff])
github wikimedia / revscoring / revscoring / datasources / revision_oriented.py View on Github external
def __init__(self, name):
        super().__init__(name)
        self.editcount = Datasource(name + ".editcount")
        "`int` : A count of edits the user has ever saved"
        self.registration_str = Datasource(name + ".registration_str")
        self.registration = Datasource(
            name + ".registration", or_none(mwtypes.Timestamp),
            depends_on=[self.registration_str])
        ":class:`mwtypes.Timestamp` : The date the user registered or None"
        "`str` : The date the user registered in ISO format"
        self.groups = Datasource(name + ".groups")
        "`list` ( `str` ) : The groups the user is a member of"
        self.emailable = Datasource(name + ".emailable")
        "`bool` : `True` if the users is emailable, `False` otherwise"
        self.gender = Datasource(name + ".gender")
        "`str` : A string representing the user's ``gender`` preference."
github wikimedia / revscoring / revscoring / datasources / meta / dicts.py View on Github external
"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `dict`'s

.. autoclass:: revscoring.datasources.meta.dicts.keys

.. autoclass:: revscoring.datasources.meta.dicts.values

"""
from ..datasource import Datasource


class keys(Datasource):
    """
    Generates a set of `dict` keys

    :Parameters:
        dict_datasource : :class:`revscoring.Datasource`
            A datasource that generates a `dict`
        name : `str`
            A name for the new datasource.
    """

    def __init__(self, dict_datasource, name=None):
        name = self._format_name(name, [dict_datasource])
        super().__init__(name, self.process,
                         depends_on=[dict_datasource])

    def process(self, d):
github wikimedia / revscoring / revscoring / datasources / revision_oriented.py View on Github external
def __init__(self, name):
        super().__init__(name)
        self.editcount = Datasource(name + ".editcount")
        "`int` : A count of edits the user has ever saved"
        self.registration_str = Datasource(name + ".registration_str")
        self.registration = Datasource(
            name + ".registration", or_none(mwtypes.Timestamp),
            depends_on=[self.registration_str])
        ":class:`mwtypes.Timestamp` : The date the user registered or None"
        "`str` : The date the user registered in ISO format"
        self.groups = Datasource(name + ".groups")
        "`list` ( `str` ) : The groups the user is a member of"
        self.emailable = Datasource(name + ".emailable")
        "`bool` : `True` if the users is emailable, `False` otherwise"
        self.gender = Datasource(name + ".gender")
        "`str` : A string representing the user's ``gender`` preference."
github wikimedia / revscoring / revscoring / datasources / tokens_added.py View on Github external
from collections import namedtuple

from .datasource import Datasource
from .revision_diff import revision_diff


def process(revision_diff):
    
    operations, a, b = revision_diff
    
    return [t for op in operations
            if op.name == "insert"
            for t in b[op.b1:op.b2]]

tokens_added = Datasource("tokens_added", process, depends_on=[revision_diff])
github wikimedia / revscoring / revscoring / datasources / user_info.py View on Github external
user_doc.get('userid'),
        user_doc.get('name'),
        user_doc.get('editcount'),
        registration,
        user_doc.get('groups', []),
        user_doc.get('implicitgroups', []),
        "emailable" in user_doc,
        user_doc.get('gender'),
        user_doc.get('block_id'),
        user_doc.get('blocked_by'),
        user_doc.get('blocked_by_id'),
        user_doc.get('block_reason'),
        user_doc.get('block_expiry')
    )

user_info = Datasource("user_info", process, depends_on=[user_doc])
github wikimedia / revscoring / revscoring / datasources / meta / indexable.py View on Github external
"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `list`'s and `tuple`'s

.. autoclass:: revscoring.datasources.meta.indexable.index

"""
from ..datasource import Datasource


class index(Datasource):
    """
    Generates a datasource that returns the value that appears at `i`

    :Parameters:
        i : `int`
            The index of a value to return
        default : `mixed`
            The value to return if no value exists at `i`.  If not specified,
            an IndexError will be raised
        name : `str`
            A name for the new datasource.
    """

    def __init__(self, i, datasources, default=NotImplemented, name=None):
        name = self._format_name(name, [i, default])
        self.i = int(i)
github wikimedia / revscoring / revscoring / datasources / session_oriented.py View on Github external
rewrite_name : function
            A function to apply to the dependent's name when re-creating it.
        cache : dict(:class:`~revscoring.Feature` | :class:`~revscoring.FeatureVector` | :class:`~revscoring.Datasource`)
            A map of dependents that have already been converted.
    """

    new_name = rewrite_name(dependent.name)
    if new_name in cache:
        logger.debug("list_of_ify {0} in the cache".format(dependent.name))
        return cache[new_name]
    else:
        logger.debug("list_of_ify is modifying {0} into a list_of".format(dependent.name))
        new_dependencies = [list_of_ify(dependency, rewrite_name, cache)
                            for dependency in dependent.dependencies]

        if isinstance(dependent, Datasource):
            new_dependent = datasource_expanders.list_of(
                dependent, depends_on=new_dependencies, name=new_name)
        elif isinstance(dependent, FeatureVector):
            new_dependent = datasource_expanders.list_of(
                dependent, depends_on=new_dependencies, name=new_name)
        elif isinstance(dependent, Feature):
            new_dependent = feature_expanders.list_of(
                dependent, depends_on=new_dependencies, name=new_name)
        else:
            raise TypeError("Cannot convert type {0} into a list_of"
                            .format(type(dependent)))

        cache[new_name] = new_dependent
        return cache[new_name]
github wikimedia / revscoring / revscoring / datasources / revision_oriented.py View on Github external
include_page_creation=False,
                 include_page_suggested=False,
                 include_content=False):
        super().__init__(name)

        self.id = Datasource(name + ".id")
        "`int` : Revision ID"
        self.timestamp_str = Datasource(name + ".timestamp_str")
        "`str` : Timestamp the revision was saved in ISO format"
        self.timestamp = Datasource(
            name + ".timestamp", or_none(mwtypes.Timestamp),
            depends_on=[self.timestamp_str])
        ":class:`mwtypes.Timestamp`: Timestamp the revision was saved"
        self.comment = Datasource(name + ".comment")
        "`str` : The comment saved with the revision"
        self.byte_len = Datasource(name + ".byte_length")
        "`int` : The length of the revision content in bytes"
        self.minor = Datasource(name + ".minor")
        "`bool` : Was the revision flagged as minor?"
        self.content_model = Datasource(name + ".content_model")
        "`str` : Describes the format of revision content"

        if include_content:
            self.text = Datasource(name + ".text")
            "`str` : The decoded (Unicode) text of the revision content"

        if include_parent:
            self.parent = Revision(
                name + ".parent",
                include_parent=False,
                include_user_info=False,
                include_page=False,