How to use the revscoring.datasources.revision_oriented.revision function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / tests / languages / test_italian.py View on Github external
OTHER = [
    """
    Le locomotive del gruppo 851 erano un gruppo di locomotive a vapore delle
    Ferrovie dello Stato.

    Furono progettate e fatte costruire dalla Rete Adriatica (RA) quali
    macchine per il servizio di linea. Nel 1905, insieme alle locomotive dei
    gruppi poi FS 290, 600 e 870 anch'esse ex RA, vennero inserite tra quelle
    che le FS reputarono meritevoli di ulteriori commesse nell'attesa del
    completamento del progetto dei nuovi gruppi idonei a fronteggiare lo
    sviluppo del traffico conseguente alla statalizzazione.
    """, "ha"
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(italian.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert italian.badwords == pickle.loads(pickle.dumps(italian.badwords))


def test_informals():
    compare_extraction(italian.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert italian.informals == pickle.loads(pickle.dumps(italian.informals))
github wikimedia / revscoring / tests / languages / test_icelandic.py View on Github external
"jullur",
    "mellufær",
    "dræsur",
    "brjóst"
]

OTHER = [
    """
    Albert 1. var þriðji konungur Belgíu frá árinu 1909 til dauðadags.
    Þetta var viðburðaríkt tímabil í sögu Belgíu því í fyrri
    heimsstyrjöldinni (1914 – 1918) var mikill meirihluti landsins hernuminn
    af Þjóðverjum.
    """
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(icelandic.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert icelandic.badwords == pickle.loads(pickle.dumps(icelandic.badwords))


def test_informals():
    compare_extraction(icelandic.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert icelandic.informals == pickle.loads(
        pickle.dumps(icelandic.informals))
github wikimedia / revscoring / tests / languages / test_croatian.py View on Github external
"ste",
    "sta",
]

OTHER = [
    """
    Iako je često vezan i uz egzistencijalizam, Camus je odbijao tu
    povezanost. No, u drugu ruku, Camus u svom eseju Pobunjeni čovjek
    piše da se cijeli svoj život borio protiv filozofije nihilizma.
    Njegova religioznost također je bila čestom temom, a sam je u
    jednoj od svojih knjiga napisao: Ne vjerujem u boga "i" nisam
    ateist.
    """
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(croatian.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert croatian.badwords == pickle.loads(pickle.dumps(croatian.badwords))


def test_informals():
    compare_extraction(croatian.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert croatian.informals == pickle.loads(pickle.dumps(croatian.informals))
github wikimedia / revscoring / tests / languages / test_bengali.py View on Github external
"পেত্নী",
]

OTHER = [
    """
    সত্যজিৎ রায় একজন ভারতীয় চলচ্চিত্র নির্মাতা ও বিংশ শতাব্দীর অন্যতম শ্রেষ্ঠ
    চলচ্চিত্র পরিচালক। কলকাতা শহরে সাহিত্য ও শিল্পের জগতে খ্যাতনামা এক বাঙালি
    পরিবারে তাঁর জন্ম হয়। তিনি কলকাতার প্রেসিডেন্সি কলেজ ও শান্তিনিকেতনে
    রবীন্দ্রনাথ ঠাকুরের প্রতিষ্ঠিত বিশ্বভারতী বিশ্ববিদ্যালয়ে পড়াশোনা করেন।
    সত্যজিতের কর্মজীবন একজন বাণিজ্যিক চিত্রকর হিসেবে শুরু হলেও প্রথমে কলকাতায়
    ফরাসী চলচ্চিত্র নির্মাতা জঁ রনোয়ারের সাথে সাক্ষাৎ ও পরে লন্ডন শহরে সফররত
    অবস্থায় ইতালীয় নব্য বাস্তবতাবাদী ছবি লাদ্রি দি বিচিক্লেত্তে.
    """
]

r_text = revision_oriented.revision.text


@mark.nottravis
def test_badwords():
    compare_extraction(bengali.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert bengali.badwords == pickle.loads(pickle.dumps(bengali.badwords))


@mark.nottravis
def test_informals():
    compare_extraction(bengali.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert bengali.informals == pickle.loads(pickle.dumps(bengali.informals))
github wikimedia / revscoring / tests / languages / test_german.py View on Github external
OTHER = [
    """
    Das Kürzel Gulag (russisch Гулаг) bezeich­net das Netz von Arbeits­lagern
    in der Sowjet­union; im weiteren Sinn steht es für die Gesamt­heit des
    sowje­tischen Zwangs­arbeits­systems, das auch Spezial­gefäng­nisse,
    Zwangs­arbeits­pflichten ohne Haft sowie einige psychia­trische Kliniken
    als Haft­verbüßungs­orte umfasste. Von 1930 bis 1953 waren in den Lagern
    mindes­tens 18 Millionen Menschen inhaf­tiert. Mehr als 2,7 Millionen
    starben im Lager oder in der Verbannung. In den letzten Lebens­jahren
    Stalins erreichte der Gulag mit rund 2,5 Millionen Insassen seine größte
    quantitative Aus­dehnung.
    """
]

r_text = revision_oriented.revision.text


def test_badwords():
    compare_extraction(german.badwords.revision.datasources.matches,
                       BAD, OTHER)

    assert german.badwords == pickle.loads(pickle.dumps(german.badwords))


def test_informals():
    compare_extraction(german.informals.revision.datasources.matches,
                       INFORMAL, OTHER)

    assert german.informals == pickle.loads(pickle.dumps(german.informals))
github wikimedia / revscoring / tests / languages / test_latvian.py View on Github external
def test_dictionary():
    cache = {revision_oriented.revision.text:
             'novirze no ilggadējiem vidējiem  worngly.'}
    assert (solve(latvian.dictionary.revision.datasources.dict_words, cache=cache) ==
            ["novirze", "no", "ilggadējiem", "vidējiem"])
    assert (solve(latvian.dictionary.revision.datasources.non_dict_words,
                  cache=cache) ==
            ["worngly"])

    assert latvian.dictionary == pickle.loads(pickle.dumps(latvian.dictionary))
github wikimedia / articlequality / articlequality / utilities / extract_from_text.py View on Github external
def extract_from_text(dependents, text, cache=None, context=None):
    """
    Extracts a set of values from a text an returns a cache containing just
    those values.

    :Parameters:
        dependents : `list`( :class:`revscoring.Dependent` )
            A list of dependents to extract values for
        text : `str`
            A text from which to extract features

    :Returns:
        A list of extracted feature values
    """
    cache = cache if cache is not None else {}
    cache[revision_oriented.revision.text] = text

    return list(solve(dependents, cache=cache, context=context))
github diyiy / Wiki_Semantic_Intention / src / feat_src / wiki_edit_extractor.py View on Github external
MAX_NUM = 14
features = [	
		# *** user
		## ro.revision.user.id, # id = 0 for ip
		## ro.revision.user.info.groups,
		## ro.revision.user.registeration,
		## ro.revision.user.gender,
		# *** revision - overall 
		## ro.revision.byte_len,
		## ro.revision.comment,
		## ro.revision.minor,
		## ro.revision.timestamp,
		### ro.revision.text,
		# page 
		# ro.revision.page.id,
		ro.revision.page.namespace.id,	
		ro.revision.minor,
		ro.revision.byte_len, 
		
		## char features
		wikitext.revision.diff.uppercase_words_added,
		wikitext.revision.diff.chars_added,
		wikitext.revision.diff.chars_removed,
		wikitext.revision.diff.numeric_chars_added,
		wikitext.revision.diff.numeric_chars_removed,
		wikitext.revision.diff.whitespace_chars_added,
		wikitext.revision.diff.whitespace_chars_removed,
		wikitext.revision.diff.markup_chars_added,
		wikitext.revision.diff.markup_chars_removed,
		wikitext.revision.diff.cjk_chars_added,
		wikitext.revision.diff.cjk_chars_removed,
		wikitext.revision.diff.entity_chars_added,