How to use the revscoring.languages.features.RegexMatches function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / revscoring / revscoring / languages / bosnian.py View on Github external
r"picka",
    r"pička",
    r"picke",
    r"pičke",
    r"picko",
    r"pičko",
    r"pizda",
    r"pizdo",
    r"pizdu",
    r"puškomet",
    r"shit",
    r"sranje",
    r"šupak"
]

badwords = RegexMatches(name + ".badwords", badword_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    r"boriću",
    r"bubaj",
    r"drzava",
    r"glup",
    r"haha",
    r"hahaha",
    r"hahahaha",
    r"hahahahaha",
    r"hihi",
    r"hihihi",
github wikimedia / revscoring / revscoring / languages / estonian.py View on Github external
r"nok(u|s)",
    r"pask",
    r"pede(d|kas|rast(id)?)?", "peded", "pedekas", "pederast", "pederastid",
    r"perse(s|sse)?",
    r"pigs?",
    r"pussy",
    r"putsi?",
    r"sitta?", r"sita(ne|junn|hunnik)?",
    r"st(oo+|u)pid",
    r"taun",
    r"türa",
    r"tussu?",
    r"vittu?", r"vitupea"
]

badwords = RegexMatches(name + ".badwords", badword_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    r"animal",
    r"(c|k)(oo+|ew)l(er|est)?",
    r"fakk+ing",
    r"g[aä]ngsta",
    r"ha(ha+)+",
    r"hmm+",
    r"ilge",
    r"ime(ge)?",
    r"jou",
    r"junni?",
github wikimedia / revscoring / revscoring / languages / german.py View on Github external
r"schwule",
    r"seid",
    r"spasti",
    r"stin(gt|k(e[rn]?|s?t)?)",
    r"swag",
    r"titten?",
    r"tobi",
    r"toll",
    r"unformatierten",
    r"vaginas",
    r"wisst",
    r"xd+?",
    r"xnxx"
]

informals = RegexMatches(name + ".informals", informal_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
github wikimedia / revscoring / revscoring / languages / galician.py View on Github external
badwords = RegexMatches(name + ".badwords", badword_regexes,
                        wrapping=(r'^|[^\w\u0980-\u09FF]',
                                  r'$|[^\w\u0980-\u09FF]'))
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    r'jajaja',
    r'jajajaja',
    r'ola',
    r'adeus'
]

informals = RegexMatches(name + ".informals", informal_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
github wikimedia / revscoring / revscoring / languages / russian.py View on Github external
r"трахал", r"трахала", r"трахали", r"трахалась",
    r"ублюдочные", r"ублюдочный",
    r"урод", r"уроды",
    r"фекальные",
    r"хер(ней|ня)",
    r"хуе(в(ый?)?|та)",
    r"ху[ий]",
    r"хуя(ми)?", r"хуями",
    r"хуйн([её]й|ю|я)",
    r"чмо",
    r"чурки",
    r"шлюха",
    r"щачло"
]

badwords = RegexMatches(name + ".badwords", badword_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    r"lol",
    r"арёл",
    r"безопасносте",
    r"блин",
    r"быдло",
    r"голактеко",
    r"доблестне",
    r"к[ао]роче?",
    r"лол",
    r"ля(ля)+",
github wikimedia / revscoring / revscoring / languages / czech.py View on Github external
r"uklizečky",
    r"ukradnou",
    r"vam",
    r"vám",
    r"vás",
    r"velkej",
    r"velky",
    r"vložit",
    r"vložte",
    r"vytrznik",
    r"zdar",
    r"znecistuje",
    r"znečistil"
]

informals = RegexMatches(name + ".informals", informal_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
github wikimedia / revscoring / revscoring / languages / galician.py View on Github external
r'estúpida',
    r'estúpidas',
    r'chúpame',
    r'cerdo',
    r'cerdos',
    r'cerda',
    r'cerdas',
    r'imbecil',
    r'imbécil',
    r'cagada',
    r'mamada',
    r'concha',
    r'gilipollas',
]

badwords = RegexMatches(name + ".badwords", badword_regexes,
                        wrapping=(r'^|[^\w\u0980-\u09FF]',
                                  r'$|[^\w\u0980-\u09FF]'))
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    r'jajaja',
    r'jajajaja',
    r'ola',
    r'adeus'
]

informals = RegexMatches(name + ".informals", informal_regexes)
"""
github wikimedia / revscoring / revscoring / languages / english.py View on Github external
r"soo+?",
    r"stink(s|y)?",
    r"s+?t+?[uo]+?p+?i+?d+?\w*",
    r"suck(s|ing|er)?", r"sux",
    r"shouldn'?t",
    r"test +edit", r"t+?u+?r+?d+?s?\w*",
    r"wasn'?t",
    r"w+[oua]+t+", r"wtf\w*", r"wh?[ua]+?t?[sz]+[ua]+p", r"s+?u+?p+?",
    r"wu+?z+?",
    r"won'?t",
    r"w+?o+?o+?f+?",
    r"ya'?ll", r"y+?a+?y+?", r"y+?e+?a+?h?", r"you('?(ve|re|ll))?",
    r"y+?o+?l+?o+?"
]

informals = RegexMatches(name + ".informals", informal_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
informal word detecting regexes.
"""

words_to_watch_regexes = [
    # Puffery
    r'legendary', r'best', r'great', r'acclaimed', r'iconic',
    r'visionary', r'outstanding', r'leading', r'celebrated',
    r'award[- ]?winning',
    r'landmark', r'cutting[- ]?edge', r'innovative', r'extraordinary',
    r'brilliant', r'hit', r'famous', r'renowned', r'remarkable',
    r'prestigious',
    r'world[- ]?class', r'respected', r'notable', r'virtuoso', r'honorable',
    r'awesome', r'unique', r'pioneering',
    # Contentious labels (-gate removed)
github wikimedia / revscoring / revscoring / languages / dutch.py View on Github external
"spaghettivreter", "pastavreter",  # perj. for people of Italian descent
    "loempiavouwer",  # "spring roll folder" people of Vietnamese descent
    "spleetoog",  # "slit eye" term for people of Asian descent
    "tuig",  # "scum"
    "zandneger",  # "sand negro" an ethnic slur for Middle Eastern people

    # Religion
    "gadverdamme", "godverdomme", "gadver", "getverderrie",   # "god damn"
    "getver", "verdomme", "verdamme", "verdorie",  # "god damn" continued
    "godskolere",  # "god fury"
    "graftak",  # "grave branch" old, moody, and/or cranky person.
    "jezus christus", "tjezus", "jeetje", "jezus mina", "jezus",  # Jesus
    "jesses", "jasses", "harrejasses", "here jezus",  # Jesus continued
]

badwords = RegexMatches(name + ".badwords", badword_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    r"aap(jes)?",
    r"banaan",
    r"bent",
    r"boe(it)?",
    r"doei"
    r"dombo",
    r"domme",
    r"eigelijk",
    r"fransoos",  # Fransoos is a lightly derogatory term for French people.
    r"godverdomme",
github wikimedia / revscoring / revscoring / languages / swedish.py View on Github external
r"äckligt",
    r"älskar",
    r"våldtagen",
    r"våldtog",
    r"sämst",
    r"slicka",
    r"snygging",
    r"sperma",
    r"svejsan",
    r"särbarn",
    r"snygg",
    r"snygga",
    r"snyggast",
]

badwords = RegexMatches(name + ".badwords", badword_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    r"adda",
    r"asså",
    r"awesome",
    r"btw",
    r"cool",
    r"coola",
    r"coolaste",
    r"coolt",
    r"din",
    r"ftw",