How to use the revscoring.features.wikitext function in revscoring

To help you get started, we’ve selected a few revscoring examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wikimedia / articlequality / articlequality / feature_lists / svwiki.py View on Github external
"""
Swedish Wikipedia
+++++++++++++++++
"""

from revscoring.features import wikitext
from revscoring.features.modifiers import max
from revscoring.languages import swedish

from . import wikipedia

cn_templates = wikitext.revision.template_names_matching(
    r"Källa[ _]behövs|Kb",
    name="svwiki.revision.cn_templates")

# Links
category_links = wikitext.revision.wikilink_titles_matching(
    r"Category|Kategori\:", name="revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
    r"File|Image|Fil\:", name="revision.image_links")

local_wiki = [
    image_links,
    image_links / max(wikitext.revision.content_chars, 1),
    category_links,
    category_links / max(wikitext.revision.content_chars, 1),
    swedish.dictionary.revision.dict_words,
    swedish.dictionary.revision.dict_words / max(wikitext.revision.words, 1),
github wikimedia / articlequality / articlequality / feature_lists / enwiki.py View on Github external
English Wikipedia
+++++++++++++++++
"""
import re

from revscoring.datasources.meta import filters, mappers
from revscoring.features import wikitext
from revscoring import Feature
from revscoring.features.meta import aggregators
from revscoring.features.modifiers import log, max, sub
from revscoring.languages import english

from . import wikipedia

# Templates
infobox_templates = wikitext.revision.template_names_matching(
    r"infobox", name="enwiki.revision.infobox_templates")
CN_TEMPLATES = [
    r"Citation[_ ]needed",
    r"Cn",
    r"Fact"
]
cn_templates = wikitext.revision.template_names_matching(
    "|".join(CN_TEMPLATES), name="enwiki.revision.cn_templates")
who_templates = wikitext.revision.template_names_matching(
    "Who", name="enwiki.revision.who_templates")
main_article_templates = wikitext.revision.template_names_matching(
    "Main", name="enwiki.main_article_templates")
CITE_TEMPLATES = [
    r"Cite",
    r"Harvard[_ ]citation[_ ]no[_ ]brackets", r"harvnb",
    r"Harvard citation", r"harv",
github wikimedia / articlequality / articlequality / feature_lists / wikipedia.py View on Github external
from revscoring.features import wikitext, modifiers

article = [
    wikitext.revision.chars,
    wikitext.revision.content_chars,
    wikitext.revision.ref_tags,
    (wikitext.revision.ref_tags /
     modifiers.max(wikitext.revision.content_chars, 1)),
    wikitext.revision.wikilinks,
    (wikitext.revision.wikilinks /
     modifiers.max(wikitext.revision.content_chars, 1)),
    wikitext.revision.external_links,
    (wikitext.revision.external_links /
     modifiers.max(wikitext.revision.content_chars, 1)),
    wikitext.revision.headings_by_level(2),
    (wikitext.revision.headings_by_level(2) /
     modifiers.max(wikitext.revision.content_chars, 1)),
    wikitext.revision.headings_by_level(3),
    (wikitext.revision.headings_by_level(3) /
     modifiers.max(wikitext.revision.content_chars, 1))
]
github diyiy / Wiki_Semantic_Intention / src / feat_src / wiki_edit_extractor.py View on Github external
wikitext.revision.diff.url_delta_sum,
		wikitext.revision.diff.url_delta_increase,
		wikitext.revision.diff.url_delta_decrease,
		wikitext.revision.diff.url_prop_delta_sum,
		wikitext.revision.diff.url_prop_delta_increase,
		wikitext.revision.diff.url_prop_delta_decrease,
		wikitext.revision.diff.word_delta_sum,
		wikitext.revision.diff.word_delta_increase,
		wikitext.revision.diff.word_delta_decrease,
		wikitext.revision.diff.word_prop_delta_sum,
		wikitext.revision.diff.word_prop_delta_increase,
		wikitext.revision.diff.word_prop_delta_decrease,
		wikitext.revision.diff.uppercase_word_delta_sum,
		wikitext.revision.diff.uppercase_word_delta_increase,
		wikitext.revision.diff.uppercase_word_delta_decrease,
		wikitext.revision.diff.uppercase_word_prop_delta_sum,
		wikitext.revision.diff.uppercase_word_prop_delta_increase,
		wikitext.revision.diff.uppercase_word_prop_delta_decrease,
		wikitext.revision.diff.punctuation_delta_sum,
		wikitext.revision.diff.punctuation_delta_increase,
		wikitext.revision.diff.punctuation_delta_decrease,
		wikitext.revision.diff.punctuation_prop_delta_sum,
		wikitext.revision.diff.punctuation_prop_delta_increase,
		wikitext.revision.diff.punctuation_prop_delta_decrease,
		wikitext.revision.diff.break_delta_sum,
		wikitext.revision.diff.break_delta_increase,
		wikitext.revision.diff.break_delta_decrease,
		wikitext.revision.diff.break_prop_delta_sum,
		wikitext.revision.diff.break_prop_delta_increase,
		wikitext.revision.diff.break_prop_delta_decrease,
		## token edit features
		wikitext.revision.diff.segments_added,
github wikimedia / editquality / editquality / feature_lists / enwiki.py View on Github external
from revscoring.languages import english

from . import mediawiki, wikipedia, wikitext

local_wiki = [
    revision_oriented.revision.comment_matches(
        r"copy|edit|npov|wp:?el",
        name="enwiki.revision.comment.has_known_word"
    ),
    revision_oriented.revision.comment_matches(
        r"\[\[WP:AES\|←\]\]",
        name="enwiki.revision.comment.is_aes"
    ),
    sub(
        wikitext_features.revision.template_names_matching(r"^cite"),
        wikitext_features.revision.parent.template_names_matching(r"^cite"),
        name="enwiki.revision.diff.cite_templates_added"
    )
]

badwords = [
    english.badwords.revision.diff.match_delta_sum,
    english.badwords.revision.diff.match_delta_increase,
    english.badwords.revision.diff.match_delta_decrease,
    english.badwords.revision.diff.match_prop_delta_sum,
    english.badwords.revision.diff.match_prop_delta_increase,
    english.badwords.revision.diff.match_prop_delta_decrease
]

informals = [
    english.informals.revision.diff.match_delta_sum,
    english.informals.revision.diff.match_delta_increase,
github wikimedia / articlequality / articlequality / feature_lists / enwiki.py View on Github external
SFN_TEMPLATES = [
    r"Shortened footnote template", r"sfn",
    r"Sfnp",
    r"Sfnm",
    r"Sfnmp"
]
shortened_footnote_templates = wikitext.revision.template_names_matching(
    "|".join(SFN_TEMPLATES),
    name="enwiki.revision.shortened_footnote_templates")
all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags
all_cite_templates = cite_templates + shortened_footnote_templates
proportion_of_templated_references = \
    all_cite_templates / max(all_ref_tags, 1)
non_templated_references = max(all_ref_tags - all_cite_templates, 0)
non_cite_templates = sub(
    wikitext.revision.templates, all_cite_templates,
    name="enwiki.revision.non_cite_templates"
)

# Links
category_links = wikitext.revision.wikilink_titles_matching(
    r"Category\:", name="enwiki.revision.category_links")

image_links = wikitext.revision.wikilink_titles_matching(
    r"File|Image\:", name="enwiki.revision.image_links")

image_templates = wikitext.revision.template_names_matching(
    r"((Wide|Tall|scalable) image)|Panorama|Panorama 2",
    name='enwiki.revision.image_template')


def get_images(strs):
github wikimedia / articlequality / articlequality / feature_lists / ruwiki.py View on Github external
name="ruwiki.revision.cn_templates")

# Links
category_links = wikitext.revision.wikilink_titles_matching(
    r"Category|Категория\:", name="revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
    r"File|Image|Файл\:", name="revision.image_links")

local_wiki = [
    russian.stemmed.revision.stem_chars,
    (russian.stemmed.revision.stem_chars /
     max(wikitext.revision.content_chars, 1)),
    image_links,
    image_links / max(wikitext.revision.content_chars, 1),
    category_links,
    category_links / max(wikitext.revision.content_chars, 1),
    russian.dictionary.revision.dict_words,
    russian.dictionary.revision.dict_words / max(wikitext.revision.words, 1),
    cn_templates,
    cn_templates / max(wikitext.revision.content_chars, 1),
]

wp10 = local_wiki + wikipedia.article
github wikimedia / articlequality / articlequality / feature_lists / wikisource.py View on Github external
# TODO: This ends up being case insensitive even though taht doesn't
#       make any sense.
weird_regexes = [
    # capital letters in the middle of a word
    r'\w[^\WA-Z\u00c0-\u00dd]*[A-Z\u00c0-\u00dd][^\WA-Z\u00c0-\u00dd]+',
    # non-text chars in the middle of a word
    r'\w+[^\w\s]\w+',
    # not actually french quotes e.g. "<<" and ">>" as opposed to « or »
    r'<<|>>'
]
weird_word_things = RegexMatches(
    "wikitext.revision.weird_word_things", weird_regexes)

# proportion of brackets and semi-colons
nonsense_markup = aggregators.len(
    wikitext.revision.datasources.tokens_matching(r"[\{\}\[\]\|\;\\\/\:]"),
    name="wikitext.revision.nonsense_markup")

# ... (in another page
# ...)
# TODO

# <big>,<small>,<center>,<div>,<span>,<b>,<i>,,<section>,''',''
good_tags = wikitext.revision.tag_names_matching(
    r"big|small|center|div|span|b|i|poem|section",
    name="wikitext.revision.good_tags")
expected_markup = aggregators.len(
    wikitext.revision.datasources.tokens_matching(r"'''|''"),
    name="wiktext.revision.expected_markup")

page = [
    wikitext.revision.chars,</section></i></b></span></div></center></small></big>
github wikimedia / articlequality / articlequality / feature_lists / ptwiki.py View on Github external
Portuguese Wikipedia
++++++++++++++++++++
"""
import re

from revscoring import Feature
from revscoring.datasources.meta import filters, mappers
from revscoring.features import wikitext
from revscoring.features.meta import aggregators
from revscoring.features.modifiers import log, max, sub
from revscoring.languages import portuguese

from . import wikipedia

# Templates
infobox_templates = wikitext.revision.template_names_matching(
    r"(Info|Infobox)", name="ptwiki.revision.infobox_templates")
CN_TEMPLATES = [
    r"Carece[ _]de[ _]fontes",
    r"Carece[ _]de[ _]fontes2",
    r"Carece[ _]de[ _]fontes/bloco",
    r"Carece[ _]de[ _]fontes/bloco2"
]
cn_templates = wikitext.revision.template_names_matching(
    "|".join(CN_TEMPLATES), name="ptwiki.revision.cn_templates")
MAIN_TEMPLATES = [
    r"Artigo[ _]principal",
    r"Ver[ _]artigo[ _]principal",
    r"Principal",
    r"Ver[ _]também artigo[ _]principal",
    r"Main",
    r"Detalhes",
github wikimedia / articlequality / articlequality / feature_lists / ruwiki.py View on Github external
"""
Russian Wikipedia
+++++++++++++++++
"""

from revscoring.features import wikitext
from revscoring.features.modifiers import max
from revscoring.languages import russian

from . import wikipedia

cn_templates = wikitext.revision.template_names_matching(
    r"Нет[ _]АИ",
    name="ruwiki.revision.cn_templates")

# Links
category_links = wikitext.revision.wikilink_titles_matching(
    r"Category|Категория\:", name="revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
    r"File|Image|Файл\:", name="revision.image_links")

local_wiki = [
    russian.stemmed.revision.stem_chars,
    (russian.stemmed.revision.stem_chars /
     max(wikitext.revision.content_chars, 1)),
    image_links,
    image_links / max(wikitext.revision.content_chars, 1),
    category_links,