How to use the revscoring.dependencies.DependentSet function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / tests / test_errors.py View on Github external
pickle.loads(pickle.dumps(pnf))
    assert str(pnf) == "PageNotFound: Could not find page ({page}:12)"

    unf = UserNotFound(DependentSet("user"), 10)
    pickle.loads(pickle.dumps(unf))
    assert str(unf) == "UserNotFound: Could not find user account ({user}:10)"

    ud = UserDeleted(DependentSet("revision"))
    pickle.loads(pickle.dumps(ud))
    assert str(ud) == "UserDeleted: User deleted ({revision})"

    cd = CommentDeleted(DependentSet("revision"))
    pickle.loads(pickle.dumps(cd))
    assert str(cd) == "CommentDeleted: Comment deleted ({revision})"

    td = TextDeleted(DependentSet("revision"))
    pickle.loads(pickle.dumps(td))
    assert str(td) == "TextDeleted: Text deleted ({revision})"

    cde = CaughtDependencyError("Test", RuntimeError("Foo"))
    pickle.loads(pickle.dumps(cde))
    assert str(cde) == "RuntimeError: Test\nNone"
github wikimedia / revscoring / tests / datasources / test_session_oriented.py View on Github external
def test_list_of_tree():
    class TestThing(DependentSet):

        def __init__(self, name):
            super().__init__(name)
            self.text = Datasource(name + ".text")
            self.len = Datasource(
                name + ".text.len", self._process_len, depends_on=[self.text])

        @staticmethod
        def _process_len(text):
            return len(text)

        @DependentSet.meta_dependent
        def contains(self, value):
            return contains(
                self.text, value,
                name=self.name + ".text.contains({0!r})".format(value))
github wikimedia / revscoring / tests / test_errors.py View on Github external
assert str(
        rnf) == "RevisionNotFound: Could not find revision ({revision}:10)"

    pnf = PageNotFound(DependentSet("page"), 12)
    pickle.loads(pickle.dumps(pnf))
    assert str(pnf) == "PageNotFound: Could not find page ({page}:12)"

    unf = UserNotFound(DependentSet("user"), 10)
    pickle.loads(pickle.dumps(unf))
    assert str(unf) == "UserNotFound: Could not find user account ({user}:10)"

    ud = UserDeleted(DependentSet("revision"))
    pickle.loads(pickle.dumps(ud))
    assert str(ud) == "UserDeleted: User deleted ({revision})"

    cd = CommentDeleted(DependentSet("revision"))
    pickle.loads(pickle.dumps(cd))
    assert str(cd) == "CommentDeleted: Comment deleted ({revision})"

    td = TextDeleted(DependentSet("revision"))
    pickle.loads(pickle.dumps(td))
    assert str(td) == "TextDeleted: Text deleted ({revision})"

    cde = CaughtDependencyError("Test", RuntimeError("Foo"))
    pickle.loads(pickle.dumps(cde))
    assert str(cde) == "RuntimeError: Test\nNone"
github wikimedia / revscoring / revscoring / languages / features / dictionary / features.py View on Github external
if hasattr(self.datasources, 'parent'):
            self.parent = Revision(name + ".parent", self.datasources.parent)
            """
            :class:`~revscoring.languages.features.dictionary.Revision` : The
            parent revision
            """

        if hasattr(self.datasources, 'diff'):
            self.diff = Diff(name + ".diff", self.datasources.diff)
            """
            :class:`~revscoring.languages.features.dictionary.Diff` : The
            diff between the parent and current revision.
            """


class Diff(DependentSet):

    def __init__(self, name, diff_datasources):
        super().__init__(name)
        self.datasources = diff_datasources

        # Simple counts (based on wikitext.edit.diff)
        self.dict_words_added = \
            aggregators.len(self.datasources.dict_words_added)
        "`int` : A count of the number of dictionary words added"
        self.dict_words_removed = \
            aggregators.len(self.datasources.dict_words_removed)
        "`int` : A count of the number of dictionary words removed"
        self.non_dict_words_added = \
            aggregators.len(self.datasources.non_dict_words_added)
        "`int` : A count of the number of non-dictionary words added"
        self.non_dict_words_removed = \
github wikimedia / revscoring / revscoring / features / temporal / revision_oriented.py View on Github external
class ParentRevision(Revision):
    "Represents a parent revision"

    def __init__(self, name, revision_datasources):
        super().__init__(name, revision_datasources.parent)

        self.seconds_since = Feature(
            name + ".seconds_since",
            _process_seconds_since,
            returns=int,
            depends_on=[revision_datasources.parent.timestamp,
                        revision_datasources.timestamp])
        "`int` : The number of seconds since the parent revision was saved."


class User(DependentSet):
    "Represents a revision user"

    def __init__(self, name, revision_datasources):
        super().__init__(name)
        self.datasources = revision_datasources.user

        if hasattr(self.datasources, 'info'):
            self.seconds_since_registration = Feature(
                name + ".seconds_since_registration",
                _process_seconds_since_registration,
                returns=int,
                depends_on=[revision_datasources.user.id,
                            revision_datasources.user.info.registration,
                            revision_datasources.timestamp])
            """
            `int` : The number of seconds since the user registered their
github wikimedia / revscoring / revscoring / datasources / revision_oriented.py View on Github external
class Namespace(DependentSet):
    """
    Represents a page's namespace
    """

    def __init__(self, name):
        super().__init__(name)
        self.id = Datasource(name + ".id")
        "`int` : The namespace's ID"
        self.name = Datasource(name + ".name")
        "`str` : The name of the namespace"


class Diff(DependentSet):
    """
    Represents the difference between two sequential revisions.
    """

    def __init__(self, name):
        super().__init__(name)


revision = Revision(
    "revision",
    include_page_creation=True,
    include_content=True,
    include_user_last_revision=True,
    include_page_suggested=True
)
"""
github wikimedia / revscoring / revscoring / features / temporal / revision_oriented.py View on Github external
class Page(DependentSet):
    "Represents a revision's page"

    def __init__(self, name, revision_datasources):
        super().__init__(name)
        self.creation = PageCreation(
            name + ".creation",
            revision_datasources
        )
        """
        :class:`~revscoring.features.temporal.PageCreation` : The first
        revision of the page
        """


class PageCreation(DependentSet):
    "Represents a page's creating revision"

    def __init__(self, name, revision_datasources):
        super().__init__(name)
        self.seconds_since = Feature(
            name + ".seconds_since",
            _process_seconds_since,
            returns=int,
            depends_on=[revision_datasources.page.creation.timestamp,
                        revision_datasources.timestamp])
        "`int`: The number of seconds since the page was created"


def _process_day_of_week(timestamp):
    if timestamp is None:
        return 7  # The day after Sunday.
github wikimedia / revscoring / revscoring / extractors / api / revision_oriented.py View on Github external
property_suggestion_doc = \
                extractor.get_property_suggestion_search_doc(page)
            self.properties = Datasource(
                page.suggested.properties.name, identity,
                depends_on=[property_suggestion_doc])


class Namespace(DependentSet):
    def __init__(self, namespace, extractor, rev_doc, namespace_title):
        super().__init__(namespace._name)
        self.id = key(['page', 'ns'], rev_doc, name=namespace.id.name)
        self.name = Datasource(namespace.name.name, first,
                               depends_on=[namespace_title])


class RevisionUser(DependentSet):

    def __init__(self, revision, extractor, rev_doc):
        super().__init__(revision.user._name)
        self.id = key('userid', rev_doc, name=revision.user.id.name,
                      if_missing=(UserDeleted, revision.user))
        self.text = key('user', rev_doc, name=revision.user.text.name,
                        if_missing=(UserDeleted, revision.user))

        if hasattr(revision.user, 'info'):
            self.info = RevisionUserInfo(revision.user, extractor)

        if hasattr(revision.user, 'last_revision'):
            lur_doc = extractor.get_last_user_rev_doc(revision)
            self.last_revision = Revision(revision.user.last_revision,
                                          extractor, lur_doc)
github wikimedia / revscoring / revscoring / features / revision_oriented.py View on Github external
regex : `str` | `re.compile`
                The regex to match.  Case-insensitive by default.
            name : `str`
                A name for the new feature.
        """
        if not hasattr(regex, 'pattern'):
            regex = re.compile(regex, re.I)

        if name is None:
            name = "{0}({1})".format(self._name + ".name_matches",
                                     repr(regex.pattern))

        return bools.regex_match(regex, self.datasources.name, name=name)


class User(DependentSet):

    def __init__(self, name, user_datasources):
        super().__init__(name)
        self.datasources = user_datasources

        self.is_anon = Feature(self._name + ".is_anon", _process_is_anon,
                               returns=bool, depends_on=[self.datasources.id])

    def id_in_set(self, ids, name=None):
        """
        Generates a :class:`revscoring.Feature` that returns True the
        user's ID appears within the provided set of IDs.

        :Parameters:
            ids : `set` ( `int` )
                A set of IDs to match against the user's ID
github wikimedia / revscoring / revscoring / datasources / revision_oriented.py View on Github external
"""
            :class:`~revscoring.datasources.revision_oriented.User` : The
            user who saved the revision.
            """

        if include_content and include_parent:
            self.diff = Diff(
                name + ".diff"
            )
            """
            :class:`~revscoring.datasources.revision_oriented.Diff` : The
            difference between this revision and the parent revision.
            """


class User(DependentSet):
    """
    Represents a user's id and name/ip
    """

    def __init__(self, name, include_info=True,
                 include_last_revision=False):
        super().__init__(name)
        self.id = Datasource(name + ".id")
        "`int` : The id of the user who saved the edit.  0 for IPs."
        self.text = Datasource(name + ".text")
        "`str` : The user's name or IP address"

        if include_info:
            self.info = UserInfo(name + ".info")
            """
            :class:`~revscoring.datasources.revision_oriented.UserInfo` :