How to use the revscoring.datasources.Datasource function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / revscoring / extractors / api / datasources.py View on Github external
return None

        rvprop = set(REV_PROPS)

        if self.revision.text in dependents:
            rvprop.add('content')

        rev_doc_map = self.extractor.get_rev_doc_map([rev_id], rvprop=rvprop)

        if rev_id not in rev_doc_map:
            raise RevisionNotFound(self.revision, rev_id=rev_id)
        else:
            return rev_doc_map[rev_id]


class PageCreationRevDoc(Datasource):

    def __init__(self, page, extractor):
        self.page = page
        self.extractor = extractor
        super().__init__(page.creation._name + ".doc", self.process,
                         depends_on=[page.id, extractor.dependents])

    def process(self, page_id, dependents):
        rvprop = set(REV_PROPS)

        if self.page.creation.text in dependents:
            rvprop.add('content')

        rev_doc = self.extractor.get_page_creation_doc(page_id, rvprop=rvprop)

        # If we didn't find a revision for page creation, this is bad.  Error.
github wikimedia / revscoring / revscoring / features / wikitext / tokenized / tokenized.py View on Github external
def tokenized(text_datasource, name=None):
    """
    Constructs a :class:`revision.Datasource` that generates a list of tokens
    """
    if name is None:
        name = "{0}({1})".format("tokenized", text_datasource)

    return Datasource(
        name, process_tokens, depends_on=[text_datasource]
    )
github wikimedia / revscoring / revscoring / extractors / api / revision_oriented.py View on Github external
def __init__(self, namespace, extractor, rev_doc, namespace_title):
        super().__init__(namespace._name)
        self.id = key(['page', 'ns'], rev_doc, name=namespace.id.name)
        self.name = Datasource(namespace.name.name, first,
                               depends_on=[namespace_title])
github wikimedia / revscoring / revscoring / features / wikibase / datasources / diff.py View on Github external
)
        self.statements_removed = Datasource(
            name + ".statements_removed", _process_statements_removed,
            depends_on=[self.properties_diff, self.parent_entity,
                        self.revision_entity]
        )
        self.claims_removed = Datasource(  # Backwards compatible
            name + ".claims_removed", _identity,
            depends_on=[self.statements_removed]
        )
        self.statements_changed = Datasource(
            name + ".statements_changed", _process_statements_changed,
            depends_on=[self.properties_diff, self.parent_entity,
                        self.revision_entity]
        )
        self.claims_changed = Datasource(  # Backwards compatible
            name + ".claims_changed", _identity,
            depends_on=[self.statements_changed]
        )
        self.sources_added = Datasource(
            name + ".sources_added", _process_sources_added,
            depends_on=[self.claims_changed]
        )
        self.sources_removed = Datasource(
            name + ".sources_removed", _process_sources_removed,
            depends_on=[self.claims_changed]
        )
        self.qualifiers_added = Datasource(
            name + ".qualifiers_added", _process_qualifiers_added,
            depends_on=[self.claims_changed]
        )
        self.qualifiers_removed = Datasource(
github wikimedia / revscoring / revscoring / features / wikibase / datasources / diff.py View on Github external
depends_on=[self.statements_removed]
        )
        self.statements_changed = Datasource(
            name + ".statements_changed", _process_statements_changed,
            depends_on=[self.properties_diff, self.parent_entity,
                        self.revision_entity]
        )
        self.claims_changed = Datasource(  # Backwards compatible
            name + ".claims_changed", _identity,
            depends_on=[self.statements_changed]
        )
        self.sources_added = Datasource(
            name + ".sources_added", _process_sources_added,
            depends_on=[self.claims_changed]
        )
        self.sources_removed = Datasource(
            name + ".sources_removed", _process_sources_removed,
            depends_on=[self.claims_changed]
        )
        self.qualifiers_added = Datasource(
            name + ".qualifiers_added", _process_qualifiers_added,
            depends_on=[self.claims_changed]
        )
        self.qualifiers_removed = Datasource(
            name + ".qualifiers_removed", _process_qualifiers_removed,
            depends_on=[self.claims_changed]
        )

        # badges
        self.badges_diff = Datasource(
            name + ".badges_diff", diff_dicts,
            depends_on=[revision_datasources.parent.badges,
github wikimedia / revscoring / revscoring / features / wikibase / datasources / diff.py View on Github external
)
        self.sources_removed = Datasource(
            name + ".sources_removed", _process_sources_removed,
            depends_on=[self.claims_changed]
        )
        self.qualifiers_added = Datasource(
            name + ".qualifiers_added", _process_qualifiers_added,
            depends_on=[self.claims_changed]
        )
        self.qualifiers_removed = Datasource(
            name + ".qualifiers_removed", _process_qualifiers_removed,
            depends_on=[self.claims_changed]
        )

        # badges
        self.badges_diff = Datasource(
            name + ".badges_diff", diff_dicts,
            depends_on=[revision_datasources.parent.badges,
                        revision_datasources.badges]
        )
        self.badges_added, self.badges_removed, self.badges_changed = \
            diff_parts(name + ".badges", self.badges_diff)
github wikimedia / revscoring / revscoring / features / wikibase / datasources / revision_oriented.py View on Github external
self.claims = Datasource(
            name + ".claim", _process_claims, depends_on=[self.entity]
        )
        """
        A `set` of unique claims in the revision
        """

        self.sources = Datasource(
            name + ".sources", _process_sources, depends_on=[self.entity]
        )
        """
        A `set` of unique sources in the revision
        """

        self.reference_claims = Datasource(
            name + ".reference_claims", _process_ref_claims,
            depends_on=[self.entity]
        )
        """
        A `set` of unique reference claims in the revision
        """

        self.qualifiers = Datasource(
            name + ".qualifiers", _process_qualifiers, depends_on=[self.entity]
        )
        """
        A `set` of unique qualifiers in the revision
        """

        self.badges = Datasource(
            name + ".badges", _process_badges, depends_on=[self.entity]
github wikimedia / revscoring / revscoring / features / wikitext / datasources / parsed.py View on Github external
def filter(self, heading):
        return heading.level == self.level


class get_key(Datasource):
    def __init__(self, key, dict_datasource, default=None, name=None):
        self.key = key
        self.default = default
        name = self._format_name(name, [dict_datasource])
        super().__init__(name, self.process, depends_on=[dict_datasource])

    def process(self, d):
        return d.get(self.key, self.default)


class execute_method(Datasource):
    def __init__(self, method_name, object_datasource, args=None, kwargs=None,
                 name=None):
        self.method_name = method_name
        self.args = args
        self.kwargs = kwargs
        name = self._format_name(name, [object_datasource])
        super().__init__(name, self.process, depends_on=[object_datasource])

    def process(self, object):
        args = self.args or []
        kwargs = self.kwargs or {}
        return getattr(object, self.method_name)(*args, **kwargs)
github wikimedia / revscoring / revscoring / features / wikibase / datasources / revision_oriented.py View on Github external
self.reference_claims = Datasource(
            name + ".reference_claims", _process_ref_claims,
            depends_on=[self.entity]
        )
        """
        A `set` of unique reference claims in the revision
        """

        self.qualifiers = Datasource(
            name + ".qualifiers", _process_qualifiers, depends_on=[self.entity]
        )
        """
        A `set` of unique qualifiers in the revision
        """

        self.badges = Datasource(
            name + ".badges", _process_badges, depends_on=[self.entity]
        )
        """
        A `set` of unique badges in the revision
        """

        if hasattr(revision_datasources, "parent") and \
           hasattr(revision_datasources.parent, "text"):
            self.parent = Revision(
                name + ".parent",
                revision_datasources.parent
            )

            if hasattr(revision_datasources, "diff"):
                self.diff = Diff(name + ".diff", self)
github wikimedia / revscoring / revscoring / features / wikitext / datasources / parsed.py View on Github external
return str(tag.tag)


def _extract_template_name(template):
    return str(template.name)


class HeadingOfLevel:
    def __init__(self, level):
        self.level = int(level)

    def filter(self, heading):
        return heading.level == self.level


class get_key(Datasource):
    def __init__(self, key, dict_datasource, default=None, name=None):
        self.key = key
        self.default = default
        name = self._format_name(name, [dict_datasource])
        super().__init__(name, self.process, depends_on=[dict_datasource])

    def process(self, d):
        return d.get(self.key, self.default)


class execute_method(Datasource):
    def __init__(self, method_name, object_datasource, args=None, kwargs=None,
                 name=None):
        self.method_name = method_name
        self.args = args
        self.kwargs = kwargs