Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
Turkish Wikipedia
+++++++++++++++++
"""
from revscoring.features import wikitext
from revscoring.features.modifiers import max, sub
from . import wikipedia
cite_templates = wikitext.revision.template_names_matching(
r"Kaynak|.*[ _]kaynağı",
name="trwiki.revision.cite_templates")
proportion_of_templated_references = \
cite_templates / max(wikitext.revision.ref_tags, 1)
non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0)
non_cite_templates = sub(
wikitext.revision.templates, cite_templates,
name="trwiki.revision.non_cite_templates"
)
infobox_templates = wikitext.revision.template_names_matching(
r".*[ _]bilgi[ _]kutusu",
name="trwiki.revision.infobox_templates")
# Copied (2015-10-29) from:
# https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Citez_vos_sources#R.C3.A9clamation_et_contestation_de_sources
cn_templates = wikitext.revision.template_names_matching(
r"Référence[ _]obsolète", r"À[ _]sourcer",
r"Sources[ _]secondaires", r"BPV[ _]à[ _]sourcer"]
lvl3_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL3_CN_TEMPLATES),
name="frwiki.revision.lvl3_cn_templates")
LVL4_CN_TEMPLATES = [r"À[ _]prouver", r"Faut[ _]sourcer"]
lvl4_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL4_CN_TEMPLATES),
name="frwiki.revision.lvl4_cn_templates")
LVL5_CN_TEMPLATES = [r"À[ _]vérifier", r"Vérifiabilité"]
lvl5_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL5_CN_TEMPLATES),
name="frwiki.revision.lvl5_cn_templates")
main_article_templates = wikitext.revision.template_names_matching(
r"Article[ _](principal|détaillé)",
name="frwiki.main_article_templates")
date_templates = wikitext.revision.template_names_matching(
r"date",
name="frwiki.revision.date_templates")
# Links
category_links = wikitext.revision.wikilink_titles_matching(
r"Category|Catégorie\:", name="frwiki.revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
r"File|Image|Fichier\:", name="frwiki.revision.image_links")
local_wiki = [
image_links,
image_links / max(wikitext.revision.content_chars, 1),
category_links,
"""
from revscoring.datasources.meta import filters, mappers
from revscoring.features import wikitext
from revscoring.features.meta import aggregators
from revscoring.features.modifiers import log, max
from revscoring.features.wikitext.datasources import Revision
from revscoring.languages import basque, english, spanish
from . import wikipedia
# Templates
infobox_templates = wikitext.revision.template_names_matching(
r"[\w\s_]*infotaula[ _]automatikoa$",
name="euwiki.revision.infobox_templates")
cn_templates = wikitext.revision.template_names_matching(
r"erref[ _]behar", name="euwiki.revision.cn_templates")
# Links
# Excluding category_links based on https://phabricator.wikimedia.org/T240467
# category_links = wikitext.revision.wikilink_titles_matching(
# r"(Kategoria|Category)\:", name="euwiki.revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
r"(File|Image|Fitxategi)\:", name="euwiki.revision.image_links")
# References
revision = Revision(
"euwiki.revision.revision",
wikitext.revision.datasources,
)
paragraphs = mappers.map(
str, revision.paragraphs_sentences_and_whitespace,
"|".join(MAIN_TEMPLATES), name="ptwiki.main_article_templates")
CITE_TEMPLATES = [
r"Cite",
r"Citar",
r"Harvard[_ ]citation[_ ]no[_ ]brackets", r"harvnb",
r"Harvard[_ ]citation", r"harv",
r"harvtxt",
r"Harvcoltxt",
r"Harvcol",
r"Harvcolnb",
r"Harvard citations", r"harvs",
r"Harvp"
]
cite_templates = wikitext.revision.template_names_matching(
"|".join(CITE_TEMPLATES), name="ptwiki.revision.cite_templates")
shortened_footnote_templates = wikitext.revision.template_names_matching(
r"sfn", name="ptwiki.revision.shortened_footnote_templates")
all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags
all_cite_templates = cite_templates + shortened_footnote_templates
proportion_of_templated_references = \
all_cite_templates / max(all_ref_tags, 1)
non_templated_references = max(all_ref_tags - all_cite_templates, 0)
non_cite_templates = sub(
wikitext.revision.templates, all_cite_templates,
name="ptwiki.revision.non_cite_templates"
)
# Links
category_links = wikitext.revision.wikilink_titles_matching(
r"(Category|Categoria)\s*\:", name="ptwiki.revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0)
non_cite_templates = sub(
wikitext.revision.templates, cite_templates,
name="trwiki.revision.non_cite_templates"
)
infobox_templates = wikitext.revision.template_names_matching(
r".*[ _]bilgi[ _]kutusu",
name="trwiki.revision.infobox_templates")
# Copied (2015-10-29) from:
# https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Citez_vos_sources#R.C3.A9clamation_et_contestation_de_sources
cn_templates = wikitext.revision.template_names_matching(
r"Kaynak[ _]belirt|Olgu|Fact|Delil",
name="trwiki.revision.lvl1_cn_templates")
main_article_templates = wikitext.revision.template_names_matching(
r"Ana|Anamadde",
name="trwiki.main_article_templates")
# Links
category_links = wikitext.revision.wikilink_titles_matching(
r"Category|Kategori\:", name="trwiki.revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
r"File|Image|Resim\:", name="rrwiki.revision.image_links")
local_wiki = [
image_links,
image_links / max(wikitext.revision.content_chars, 1),
category_links,
category_links / max(wikitext.revision.content_chars, 1),
cite_templates,
cite_templates / max(wikitext.revision.content_chars, 1),
from revscoring.features.modifiers import sub
from revscoring.languages import english
from . import mediawiki, wikipedia, wikitext
local_wiki = [
revision_oriented.revision.comment_matches(
r"copy|edit|npov|wp:?el",
name="enwiki.revision.comment.has_known_word"
),
revision_oriented.revision.comment_matches(
r"\[\[WP:AES\|←\]\]",
name="enwiki.revision.comment.is_aes"
),
sub(
wikitext_features.revision.template_names_matching(r"^cite"),
wikitext_features.revision.parent.template_names_matching(r"^cite"),
name="enwiki.revision.diff.cite_templates_added"
)
]
badwords = [
english.badwords.revision.diff.match_delta_sum,
english.badwords.revision.diff.match_delta_increase,
english.badwords.revision.diff.match_delta_decrease,
english.badwords.revision.diff.match_prop_delta_sum,
english.badwords.revision.diff.match_prop_delta_increase,
english.badwords.revision.diff.match_prop_delta_decrease
]
informals = [
english.informals.revision.diff.match_delta_sum,
"""
Basque Wikipedia
++++++++++++++++
"""
from revscoring.datasources.meta import filters, mappers
from revscoring.features import wikitext
from revscoring.features.meta import aggregators
from revscoring.features.modifiers import log, max
from revscoring.features.wikitext.datasources import Revision
from revscoring.languages import basque, english, spanish
from . import wikipedia
# Templates
infobox_templates = wikitext.revision.template_names_matching(
r"[\w\s_]*infotaula[ _]automatikoa$",
name="euwiki.revision.infobox_templates")
cn_templates = wikitext.revision.template_names_matching(
r"erref[ _]behar", name="euwiki.revision.cn_templates")
# Links
# Excluding category_links based on https://phabricator.wikimedia.org/T240467
# category_links = wikitext.revision.wikilink_titles_matching(
# r"(Kategoria|Category)\:", name="euwiki.revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
r"(File|Image|Fitxategi)\:", name="euwiki.revision.image_links")
# References
revision = Revision(
"euwiki.revision.revision",
wikitext.revision.datasources,
"""
Persian Wikipedia
+++++++++++++++++
"""
from revscoring.features import wikitext
from revscoring.features.meta import aggregators
from revscoring.features.modifiers import max, sub, log
from revscoring.features.wikitext.datasources import Revision
from revscoring.datasources.meta import mappers, filters
from . import wikipedia
# Templates
infobox_templates = wikitext.revision.template_names_matching(
r"infobox|جعبه", name="fawiki.revision.infobox_templates")
CN_TEMPLATES = [
r"Citation[_ ]needed",
r"Cn",
r"Fact",
r"مدرک"
]
cn_templates = wikitext.revision.template_names_matching(
"|".join(CN_TEMPLATES), name="fawiki.revision.cn_templates")
who_templates = wikitext.revision.template_names_matching(
"Who|چه کسی|چهکسی", name="fawiki.revision.who_templates")
main_article_templates = wikitext.revision.template_names_matching(
"Main|اصلی", name="fawiki.main_article_templates")
cite_templates = wikitext.revision.template_names_matching(
r"cite|یادکرد", name="fawiki.revision.cite_templates")
proportion_of_templated_references = \