Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
OTHER = [
"""
Le locomotive del gruppo 851 erano un gruppo di locomotive a vapore delle
Ferrovie dello Stato.
Furono progettate e fatte costruire dalla Rete Adriatica (RA) quali
macchine per il servizio di linea. Nel 1905, insieme alle locomotive dei
gruppi poi FS 290, 600 e 870 anch'esse ex RA, vennero inserite tra quelle
che le FS reputarono meritevoli di ulteriori commesse nell'attesa del
completamento del progetto dei nuovi gruppi idonei a fronteggiare lo
sviluppo del traffico conseguente alla statalizzazione.
""", "ha"
]
r_text = revision_oriented.revision.text
def test_badwords():
compare_extraction(italian.badwords.revision.datasources.matches,
BAD, OTHER)
assert italian.badwords == pickle.loads(pickle.dumps(italian.badwords))
def test_informals():
compare_extraction(italian.informals.revision.datasources.matches,
INFORMAL, OTHER)
assert italian.informals == pickle.loads(pickle.dumps(italian.informals))
"jullur",
"mellufær",
"dræsur",
"brjóst"
]
OTHER = [
"""
Albert 1. var þriðji konungur Belgíu frá árinu 1909 til dauðadags.
Þetta var viðburðaríkt tímabil í sögu Belgíu því í fyrri
heimsstyrjöldinni (1914 – 1918) var mikill meirihluti landsins hernuminn
af Þjóðverjum.
"""
]
r_text = revision_oriented.revision.text
def test_badwords():
compare_extraction(icelandic.badwords.revision.datasources.matches,
BAD, OTHER)
assert icelandic.badwords == pickle.loads(pickle.dumps(icelandic.badwords))
def test_informals():
compare_extraction(icelandic.informals.revision.datasources.matches,
INFORMAL, OTHER)
assert icelandic.informals == pickle.loads(
pickle.dumps(icelandic.informals))
"ste",
"sta",
]
OTHER = [
"""
Iako je često vezan i uz egzistencijalizam, Camus je odbijao tu
povezanost. No, u drugu ruku, Camus u svom eseju Pobunjeni čovjek
piše da se cijeli svoj život borio protiv filozofije nihilizma.
Njegova religioznost također je bila čestom temom, a sam je u
jednoj od svojih knjiga napisao: Ne vjerujem u boga "i" nisam
ateist.
"""
]
r_text = revision_oriented.revision.text
def test_badwords():
compare_extraction(croatian.badwords.revision.datasources.matches,
BAD, OTHER)
assert croatian.badwords == pickle.loads(pickle.dumps(croatian.badwords))
def test_informals():
compare_extraction(croatian.informals.revision.datasources.matches,
INFORMAL, OTHER)
assert croatian.informals == pickle.loads(pickle.dumps(croatian.informals))
"পেত্নী",
]
OTHER = [
"""
সত্যজিৎ রায় একজন ভারতীয় চলচ্চিত্র নির্মাতা ও বিংশ শতাব্দীর অন্যতম শ্রেষ্ঠ
চলচ্চিত্র পরিচালক। কলকাতা শহরে সাহিত্য ও শিল্পের জগতে খ্যাতনামা এক বাঙালি
পরিবারে তাঁর জন্ম হয়। তিনি কলকাতার প্রেসিডেন্সি কলেজ ও শান্তিনিকেতনে
রবীন্দ্রনাথ ঠাকুরের প্রতিষ্ঠিত বিশ্বভারতী বিশ্ববিদ্যালয়ে পড়াশোনা করেন।
সত্যজিতের কর্মজীবন একজন বাণিজ্যিক চিত্রকর হিসেবে শুরু হলেও প্রথমে কলকাতায়
ফরাসী চলচ্চিত্র নির্মাতা জঁ রনোয়ারের সাথে সাক্ষাৎ ও পরে লন্ডন শহরে সফররত
অবস্থায় ইতালীয় নব্য বাস্তবতাবাদী ছবি লাদ্রি দি বিচিক্লেত্তে.
"""
]
r_text = revision_oriented.revision.text
@mark.nottravis
def test_badwords():
compare_extraction(bengali.badwords.revision.datasources.matches,
BAD, OTHER)
assert bengali.badwords == pickle.loads(pickle.dumps(bengali.badwords))
@mark.nottravis
def test_informals():
compare_extraction(bengali.informals.revision.datasources.matches,
INFORMAL, OTHER)
assert bengali.informals == pickle.loads(pickle.dumps(bengali.informals))
OTHER = [
"""
Das Kürzel Gulag (russisch Гулаг) bezeichnet das Netz von Arbeitslagern
in der Sowjetunion; im weiteren Sinn steht es für die Gesamtheit des
sowjetischen Zwangsarbeitssystems, das auch Spezialgefängnisse,
Zwangsarbeitspflichten ohne Haft sowie einige psychiatrische Kliniken
als Haftverbüßungsorte umfasste. Von 1930 bis 1953 waren in den Lagern
mindestens 18 Millionen Menschen inhaftiert. Mehr als 2,7 Millionen
starben im Lager oder in der Verbannung. In den letzten Lebensjahren
Stalins erreichte der Gulag mit rund 2,5 Millionen Insassen seine größte
quantitative Ausdehnung.
"""
]
r_text = revision_oriented.revision.text
def test_badwords():
compare_extraction(german.badwords.revision.datasources.matches,
BAD, OTHER)
assert german.badwords == pickle.loads(pickle.dumps(german.badwords))
def test_informals():
compare_extraction(german.informals.revision.datasources.matches,
INFORMAL, OTHER)
assert german.informals == pickle.loads(pickle.dumps(german.informals))
def test_dictionary():
cache = {revision_oriented.revision.text:
'novirze no ilggadējiem vidējiem worngly.'}
assert (solve(latvian.dictionary.revision.datasources.dict_words, cache=cache) ==
["novirze", "no", "ilggadējiem", "vidējiem"])
assert (solve(latvian.dictionary.revision.datasources.non_dict_words,
cache=cache) ==
["worngly"])
assert latvian.dictionary == pickle.loads(pickle.dumps(latvian.dictionary))
def extract_from_text(dependents, text, cache=None, context=None):
"""
Extracts a set of values from a text an returns a cache containing just
those values.
:Parameters:
dependents : `list`( :class:`revscoring.Dependent` )
A list of dependents to extract values for
text : `str`
A text from which to extract features
:Returns:
A list of extracted feature values
"""
cache = cache if cache is not None else {}
cache[revision_oriented.revision.text] = text
return list(solve(dependents, cache=cache, context=context))
MAX_NUM = 14
features = [
# *** user
## ro.revision.user.id, # id = 0 for ip
## ro.revision.user.info.groups,
## ro.revision.user.registeration,
## ro.revision.user.gender,
# *** revision - overall
## ro.revision.byte_len,
## ro.revision.comment,
## ro.revision.minor,
## ro.revision.timestamp,
### ro.revision.text,
# page
# ro.revision.page.id,
ro.revision.page.namespace.id,
ro.revision.minor,
ro.revision.byte_len,
## char features
wikitext.revision.diff.uppercase_words_added,
wikitext.revision.diff.chars_added,
wikitext.revision.diff.chars_removed,
wikitext.revision.diff.numeric_chars_added,
wikitext.revision.diff.numeric_chars_removed,
wikitext.revision.diff.whitespace_chars_added,
wikitext.revision.diff.whitespace_chars_removed,
wikitext.revision.diff.markup_chars_added,
wikitext.revision.diff.markup_chars_removed,
wikitext.revision.diff.cjk_chars_added,
wikitext.revision.diff.cjk_chars_removed,
wikitext.revision.diff.entity_chars_added,