How to use the mwparserfromhell.nodes function in mwparserfromhell

To help you get started, we’ve selected a few mwparserfromhell examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github funtoo / ego / python / mediawiki / cli_parser.py View on Github external
accum_text.append(text_tokenize(node, prev_node))
		elif type(node) == mwparserfromhell.nodes.wikilink.Wikilink:
			if node.title.startswith("File:"):
				nodes = [ Color.RED, text_tokenize("Image - Click to view: "), Color.END, mwparserfromhell.nodes.external_link.ExternalLink("http://www.funtoo.org/%s" % node.title) ] + nodes
			elif node.text:
				nodes = [ Color.UNDERLINE, Color.CYAN ] + list(getMainNodes(str(node.text).strip())) + [ Color.END ] + nodes
			else:
				nodes = [ Color.UNDERLINE, Color.CYAN ] + list(getMainNodes(str(node.title).strip())) + [ Color.END ] + nodes
		elif type(node) == mwparserfromhell.nodes.external_link.ExternalLink:
			if node.title:
				tx = [ Color.UNDERLINE, text_tokenize(str(node.title).strip(), prev_node), Color.END, WikiTextSpace(),
				       WikiTextWord("("), Color.CYAN, WikiTextWord(node.url), Color.END, WikiTextWord(")") ]
			else:
				tx = [ Color.CYAN, text_tokenize(str(node.url), prev_node), Color.END]
			accum_text.append(tx)
		elif type(node) == mwparserfromhell.nodes.tag.Tag:
			if node.tag in ignore_tags:
				continue
			elif node.tag == 'tr':
				# table row start
				nodes = [ TableRowStart() ] + list(getMainNodes(str(node.contents).strip())) + [ TableRowEnd() ] + nodes
			elif node.tag == 'div':
				# just render the contents of the div
				nodes = list(getMainNodes(str(node.contents))) + nodes
			elif node.tag == 'nowiki':
				nodes = text_tokenize(node.contents, prev_node) + nodes
			elif node.tag == 'td':
				nodes = [ TableDataStart() ] + list(getMainNodes(str(node.contents).strip())) + [ TableDataEnd() ] + nodes
			elif node.tag == 'dt':
				accum_text.append([ OutputPadding(), Color.RED ])
			elif node.tag == 'dd':
				accum_text.append([ Color.END ])
github lahwaacz / wiki-scripts / ws / parser_helpers / wikicode.py View on Github external
def _get_text(index):
        # the first node has no previous node, especially not the last node
        if index < 0:
            return None, None
        try:
            node = parent.get(index)
            # don't EVER remove whitespace from non-Text nodes (it would
            # modify the objects by converting to str, making the operation
            # and replacing the object with str, but we keep references to
            # the old nodes)
            if not isinstance(node, mwparserfromhell.nodes.text.Text):
                return None, mwparserfromhell.nodes.text.Text
            return node, mwparserfromhell.nodes.text.Text
        except IndexError:
            return None, None
github wikimedia / revscoring / revscoring / features / wikitext / datasources / parsed.py View on Github external
name=self._name + ".headings"
        )
        """
        A list of :class:`mwparserfromhell.nodes.heading.Heading`'s
        """

        self.heading_titles = mappers.map(
            _extract_heading_title, self.headings,
            name=self._name + ".heading_titles"
        )
        """
        A list of heading titles
        """

        self.external_links = get_key(
            mwparserfromhell.nodes.ExternalLink, self.node_class_map,
            default=[],
            name=self._name + ".external_links"
        )
        """
        A list of :class:`mwparserfromhell.nodes.heading.ExternalLink`'s
        """

        self.external_link_urls = mappers.map(
            _extract_external_link_url, self.external_links,
            name=self._name + ".external_link_url"
        )
        """
        A list of external link urls
        """

        self.wikilinks = get_key(
github amyxzhang / wikum / wikum / wikichatter / indentutils.py View on Github external
def has_continuation_indent(wikicode):
    if len(wikicode.nodes) > 0:
        start_node = wikicode.nodes[0]
        if type(start_node) is mwp.nodes.template.Template:
            return "outdent" in str(start_node).lower() or "undent" in str(start_node).lower() or "od" in str(start_node).lower()
        if type(start_node) is mwp.nodes.template.Text:
            return "outdent" in str(start_node).lower() or "undent" in str(start_node).lower()
    return False
github lahwaacz / wiki-scripts / url-replace.py View on Github external
def update_extlink(self, wikicode, extlink):
#        self.strip_extra_brackets(wikicode, extlink)

        # create copy to avoid changing links that don't match
        if extlink.title is not None:
            extlink_copy = mwparserfromhell.nodes.ExternalLink(str(extlink.url), str(extlink.title), extlink.brackets, extlink.suppress_space)
        else:
            extlink_copy = mwparserfromhell.nodes.ExternalLink(str(extlink.url), extlink.title, extlink.brackets, extlink.suppress_space)

        # replace HTML entities like "&#61" or "Σ" in the URL with their unicode equivalents
        # TODO: this may break templates if the decoded "&#61" stays in the replaced URL
        for entity in extlink.url.ifilter_html_entities(recursive=True):
            extlink.url.replace(entity, entity.normalize())

        # always make sure to return as soon as the extlink is matched and replaced
        # temporarily disable old replacements
#        if self.extlink_replacements(wikicode, extlink):
#            return
        if self.extlink_url_replacements(wikicode, extlink):
            return

        # roll back the replacement of HTML entities if the extlink was not replaced by the rules
        wikicode.replace(extlink, extlink_copy)
github lahwaacz / wiki-scripts / link-checker.py View on Github external
def _get_text(index):
            try:
                node = parent.get(index)
                if not isinstance(node, mwparserfromhell.nodes.text.Text):
                    return None
                return node
            except IndexError:
                return None
github eggpi / citationhunt / snippet_parser / snippet_parser.py View on Github external
empty_or_template = (lambda node:
            node == '' or
            isinstance(node, mwparserfromhell.nodes.template.Template) or
            re.match('^\n+$', e(node)))
        nodes = list(itertools.dropwhile(empty_or_template, wikicode.nodes))