How to use the revscoring.datasources.revision_oriented function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / tests / languages / test_galician.py View on Github external
def test_dictionary():
    cache = {revision_oriented.revision.text: 'táboa períodos worngly.'}
    assert (solve(galician.dictionary.revision.datasources.dict_words, cache=cache) ==
            ['táboa', 'períodos'])
    assert (solve(galician.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ["worngly"])

    assert galician.dictionary == pickle.loads(pickle.dumps(galician.dictionary))
github wikimedia / revscoring / tests / languages / test_hindi.py View on Github external
@mark.nottravis
def test_dictionary():
    cache = {revision_oriented.revision.text: 'पहनाया उनकी कविताओं worngly.'}
    assert (solve(hindi.dictionary.revision.datasources.dict_words, cache=cache) ==
            ["पहनाया", "उनकी"])
    assert (solve(hindi.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ["कविताओं", "worngly"])

    assert hindi.dictionary == pickle.loads(pickle.dumps(hindi.dictionary))
github wikimedia / revscoring / tests / languages / util.py View on Github external
def process(text):
        return solve(extractor, cache={ro.revision.text: text})
github wikimedia / revscoring / tests / languages / test_icelandic.py View on Github external
def test_stopwords():
    cache = {revision_oriented.revision.text:
             "belgíska konungsríkisins auk verndarsvæði honum hann"}
    assert (
        solve(
            icelandic.stopwords.revision.datasources.stopwords,
            cache=cache) == [
            "auk",
            "honum",
            "hann"])
    assert (solve(icelandic.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ["belgíska", "konungsríkisins", "verndarsvæði"])

    assert icelandic.stopwords == pickle.loads(
        pickle.dumps(icelandic.stopwords))
github wikimedia / revscoring / tests / languages / test_polish.py View on Github external
def test_dictionary():
    cache = {revision_oriented.revision.text:
             'obrębie znamion barwnikowych  worngly.'}
    assert (solve(polish.dictionary.revision.datasources.dict_words, cache=cache) ==
            ["obrębie", "znamion", "barwnikowych"])
    assert (solve(polish.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ["worngly"])

    assert polish.dictionary == pickle.loads(pickle.dumps(polish.dictionary))
github wikimedia / revscoring / revscoring / extractors / api / extractor.py View on Github external
parent_id = rev_cache.get(
                            revision_oriented.revision.parent.id,
                            rev_doc.get('parentid'))
                        parentids_to_lookup.append(parent_id)

                logger.info("Batch requesting {0} revision.parent from the API"
                            .format(len(parentids_to_lookup)))
                parent_rev_docs = self.get_rev_doc_map(parentids_to_lookup,
                                                       rvprop=rvprop)

                for rev_id, rev_cache in caches.items():
                    if self.revision.doc in rev_cache and \
                       self.revision.parent.doc not in rev_cache:
                        rev_doc = rev_cache[self.revision.doc]
                        parent_id = rev_cache.get(
                            revision_oriented.revision.parent.id,
                            rev_doc.get('parentid'))

                        if parent_id in parent_rev_docs:
                            rev_cache[self.revision.parent.doc] = \
                                parent_rev_docs[parent_id]
                        elif parent_id == 0:
                            rev_cache[self.revision.parent.doc] = None
                        else:
                            errored[rev_id] = \
                                RevisionNotFound(self.revision.parent,
                                                 parent_id)

            if self.revision.user.info & all_dependents:
                user_texts_to_lookup = set()
                for rev_id, rev_cache in caches.items():
                    if self.revision.doc in rev_cache and \
github wikimedia / revscoring / revscoring / features / bytes / revision_oriented.py View on Github external
)
        "`int` : The length of the revision content in bytes"

        if hasattr(revision_datasources, "parent"):
            self.parent = Revision(
                name + ".parent",
                revision_datasources.parent
            )
            """
            :class:`revscoring.features.bytes.Revision` : The
            parent (aka "previous") revision of the page.
            """


revision = Revision(name,
                    datasources.Revision(name, revision_oriented.revision))
"""
Represents the base revision of interest.  Implements this a basic structure: