How to use the revscoring.datasources.revision_oriented.revision.text function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / tests / languages / test_french.py View on Github external
"lol", "lool",
    "mdr", "mdrr",
    "moche",
    "ouai",
    "ouais",
    "ptdr",
    "truc",
    "voila",
    "voulez"
]

OTHER = [
    "connection", "fitness", "le"
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(french.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert french.badwords == pickle.loads(pickle.dumps(french.badwords))


def test_informals():
    compare_extraction(french.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert french.informals == pickle.loads(pickle.dumps(french.informals))
github wikimedia / revscoring / tests / languages / test_albanian.py View on Github external
"yeah",
    "yes",
    "yolo",
]

OTHER = [
    """
    Kirenea (greqishtja e lashtë: Κυρήνη Kyrēnē) ka qenë një qytet antik g
    rek dhe romak pranë qytetit të sotëm Shahhat, Libi. Ai ishte qyteti
    më i vjetër dhe më i rëndësishëm nga pesë qytetet greke në rajon. Ai i
    dha Libisë lindore emrin klasik Cyrenaica që ajo ka ruajtur deri në
    kohët moderne.
    """
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(albanian.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert albanian.badwords == pickle.loads(pickle.dumps(albanian.badwords))


def test_informals():
    compare_extraction(albanian.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert albanian.informals == pickle.loads(pickle.dumps(albanian.informals))
github wikimedia / revscoring / tests / languages / test_polish.py View on Github external
def test_stopwords():
    cache = {revision_oriented.revision.text: 'być barwnikowych pomocniczą'}
    assert (solve(polish.stopwords.revision.datasources.stopwords, cache=cache) ==
            ['być'])
    assert (solve(polish.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ['barwnikowych', 'pomocniczą'])

    assert polish.stopwords == pickle.loads(pickle.dumps(polish.stopwords))
github wikimedia / revscoring / tests / languages / test_albanian.py View on Github external
def test_stopwords():
    cache = {revision_oriented.revision.text:
             "Ai ishte qyteti më i vjetër dhe më " +
             "pesë qytetet greke në rajon"}
    assert (solve(albanian.stopwords.revision.datasources.stopwords, cache=cache) ==
            ["ishte", "dhe"])
    assert (solve(albanian.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ["Ai", "qyteti", "më", "i", "vjetër", "më", "pesë",
             "qytetet", "greke", "në", "rajon"])

    assert albanian.stopwords == pickle.loads(pickle.dumps(albanian.stopwords))
github wikimedia / revscoring / tests / languages / test_spanish.py View on Github external
"wey",
    "XD", "xdd", "xddddd"
]

OTHER = [
    """
    Su cuerpo es largo y estilizado, de un color gris parduzco, menos en su
    parte inferior, que es blanquecina. Existen dos subespecies diferenciadas:
    el rorcual del norte, que tiene su hábitat en el Atlántico Norte, y el
    rorcual antártico, de mayor tamaño, que vive habitualmente en aguas del
    océano Antártico. Puede verse en los principales océanos del planeta,
    desde las aguas polares a las tropicales.
    """,
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(spanish.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert spanish.badwords == pickle.loads(pickle.dumps(spanish.badwords))


def test_informals():
    compare_extraction(spanish.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert spanish.informals == pickle.loads(pickle.dumps(spanish.informals))
github wikimedia / revscoring / tests / languages / test_estonian.py View on Github external
def test_stopwords():
    cache = {revision_oriented.revision.text: "Bergi ja Gerdruta Wilhelmine " +
                                              "von Ermesi vanima pojana."}
    assert (solve(estonian.stopwords.revision.datasources.stopwords, cache=cache) ==
            ["von"])
    assert (solve(estonian.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ["Bergi", "ja", "Gerdruta", "Wilhelmine", "Ermesi", "vanima",
             "pojana"])

    assert estonian.stopwords == pickle.loads(pickle.dumps(estonian.stopwords))
github wikimedia / revscoring / tests / languages / test_latvian.py View on Github external
def test_stopwords():
    cache = {revision_oriented.revision.text: 'novirze būt vidējiem'}
    assert (solve(latvian.stopwords.revision.datasources.stopwords, cache=cache) ==
            ['būt'])
    assert (solve(latvian.stopwords.revision.datasources.non_stopwords,
                  cache=cache) ==
            ['novirze', 'vidējiem'])

    assert latvian.stopwords == pickle.loads(pickle.dumps(latvian.stopwords))
github wikimedia / revscoring / tests / languages / test_norwegian.py View on Github external
"yeah"
]

OTHER = [
    """
    Moulana Jalalod-din Balkhi Mohammad Rumi (født 30. september 1207 i Balkh,
    død 17. desember 1273 i Konya i daværende Persia) var en dikter,
    jurist, mystiker og teolog av tyrkisk, tadsjikisk eller persisk
    opprinnelse. Hans tilhengere stiftet den sufistiske Mevlevi-ordenen,
    kjent som De dansende dervisjer. Han skrev også flere bøker, hvorav den
    mest kjente er Masnavi-ye ma'navi, en samling av lignelser i diktform,
    ofte omtalt som «Koranen på persisk tungemål» (qorân dar zabân-e pahlavi).
    """
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(norwegian.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert norwegian.badwords == pickle.loads(pickle.dumps(norwegian.badwords))


def test_informals():
    compare_extraction(norwegian.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert norwegian.informals == pickle.loads(
        pickle.dumps(norwegian.informals))
github wikimedia / revscoring / tests / languages / test_greek.py View on Github external
OTHER = [
    """
    Η Μήδεια είναι όπερα κομίκ σε τρεις πράξεις του Λουίτζι Κερουμπίνι.
    Το λιμπρέτο του Φρανσουά-Μπενουά Οφμάν είναι βασισμένο στην ομώνυμη
    τραγωδία του Ευριπίδη, Μήδεια, καθώς και στο θεατρικό έργο Μήδεια του Πιέρ
    Κορνέιγ. Παρουσιάστηκε πρώτη φορά στις 17 Μαρτίου 1797 στο θέατρο Φεντώ
    στο Παρίσι με τη Γαλλίδα υψίφωνο Ζιλί-Ανζελίκ Σιό στο ρόλο της Μήδειας.
    Είναι ένα από τα πιο γνωστά έργα του Κερουμπίνι και το μόνο που παίζεται
    τακτικά έως σήμερα. Η όπερα, αν και στην πρωτότυπη εκδοχή ήταν στα γαλλικά
    και συμπεριελάμβανε διαλόγους δίχως συνοδεία μουσικής, έγινε γνωστή τον
    περασμένο αιώνα με την Ιταλική εκδοχή του λιμπρέτου του Οφμάν και των
    ρετσιτατίβι του Φραντς Λάχνερ από τον Κάρλο Τσανγκαρίνι.
    """
]

r_text = revision_oriented.revision.text


def test_badwords():
    print(greek.badwords.revision.datasources.matches("βλάκας"))
    compare_extraction(greek.badwords.revision.datasources.matches,
                       BAD, OTHER)
    print(greek.badwords.revision.datasources.matches("βλάκας"))
    assert greek.badwords == pickle.loads(pickle.dumps(greek.badwords))


def test_informals():
    compare_extraction(greek.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert greek.informals == pickle.loads(pickle.dumps(greek.informals))
github wikimedia / revscoring / tests / languages / test_finnish.py View on Github external
def test_stopwords():
    cache = {revision_oriented.revision.text: "Nordström on ette melko " +
                                              "paljon huomiota"}
    assert (solve(finnish.stopwords.revision.datasources.stopwords,
            cache=cache) == ["on", "ette"])
    assert (solve(finnish.stopwords.revision.datasources.non_stopwords,
            cache=cache) == ['Nordström', 'melko', 'paljon', 'huomiota'])

    assert finnish.stopwords == pickle.loads(pickle.dumps(finnish.stopwords))