How to use the html5lib.constants function in html5lib

To help you get started, we’ve selected a few html5lib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github html5lib / html5lib-python / tests / test_parser.py View on Github external
if innerHTML:
            innerHTML = str(innerHTML, "utf8")

        if errors:
            errors = str(errors, "utf8")
            errors = errors.split("\n")

        expected = str(expected, "utf8")

        try:
            if innerHTML:
                document = p.parseFragment(io.BytesIO(input), innerHTML)
            else:
                try:
                    document = p.parse(io.BytesIO(input))
                except constants.DataLossWarning:
                    sys.stderr.write("Test input causes known dataloss, skipping")
                    return 
        except:
            errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), 
                                  "\nExpected:", expected,
                                  "\nTraceback:", traceback.format_exc()])
            self.assertTrue(False, errorMsg)
        
        output = convertTreeDump(p.tree.testSerializer(document))
        output = attrlist.sub(sortattrs, output)
        
        expected = convertExpected(expected)
        expected = attrlist.sub(sortattrs, expected)
        errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), 
                              "\nExpected:", expected,
                              "\nReceived:", output])
github html5lib / html5lib-python / tests / test_parser.py View on Github external
errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), 
                                  "\nExpected:", expected,
                                  "\nTraceback:", traceback.format_exc()])
            self.assertTrue(False, errorMsg)
        
        output = convertTreeDump(p.tree.testSerializer(document))
        output = attrlist.sub(sortattrs, output)
        
        expected = convertExpected(expected)
        expected = attrlist.sub(sortattrs, expected)
        errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), 
                              "\nExpected:", expected,
                              "\nReceived:", output])
        self.assertEquals(expected, output, errorMsg)
        errStr = ["Line: %i Col: %i %s %s"%(line, col, 
                                         constants.E[errorcode], datavars) for
                  ((line,col), errorcode, datavars) in p.errors]
        errorMsg2 = "\n".join(["\n\nInput:", str(input, "utf8"),
                               "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
                               "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
        if checkParseErrors:
            self.assertEquals(len(p.errors), len(errors), errorMsg2)
github html5lib / html5lib-python / tests / test_serializer.py View on Github external
import os
import unittest
from support import simplejson, html5lib_test_files

from html5lib import html5parser, serializer, constants
from html5lib.treewalkers._base import TreeWalker

default_namespace = constants.namespaces["html"]

class JsonWalker(TreeWalker):
    def __iter__(self):
        for token in self.tree:
            type = token[0]
            if type == "StartTag":
                if len(token) == 4:
                    namespace, name, attrib = token[1:]
                else:
                    namespace = default_namespace
                    name, attrib = token[1:]
                yield self.startTag(namespace, name, attrib)
            elif type == "EndTag":
                if len(token) == 3:
                    namespace, name = token[1:]
                else:
github html5lib / html5lib-python / html5lib / treewalkers / __init__.py View on Github external
if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
                if token["namespace"] in constants.prefixes:
                    ns = constants.prefixes[token["namespace"]]
                else:
                    ns = token["namespace"]
                name = "%s %s" % (ns, token["name"])
            else:
                name = token["name"]
            output.append("%s<%s>" % (" " * indent, name))
            indent += 2
            # attributes (sorted for consistent ordering)
            attrs = token["data"]
            for (namespace, localname), value in sorted(attrs.items()):
                if namespace:
                    if namespace in constants.prefixes:
                        ns = constants.prefixes[namespace]
                    else:
                        ns = namespace
                    name = "%s %s" % (ns, localname)
                else:
                    name = localname
                output.append("%s%s=\"%s\"" % (" " * indent, name, value))
            # self-closing
            if type == "EmptyTag":
                indent -= 2

        elif type == "EndTag":
            indent -= 2

        elif type == "Comment":
            output.append("%s" % (" " * indent, token["data"]))
github palexu / send2kindle / calibre / ebooks / oeb / parse_utils.py View on Github external
if ns is not None:
                elem.tag = '{%s}%s'%(ns, tag)

        for b in tuple(elem.attrib):
            idx = b.find('U0003A')
            if idx > -1:
                prefix, tag = b[:idx], b[idx+6:]
                ns = elem.nsmap.get(prefix, None)
                if ns is None:
                    ns = non_html5_namespaces.get(prefix, None)
                if ns is not None:
                    elem.attrib['{%s}%s'%(ns, tag)] = elem.attrib.pop(b)

        seen_namespaces |= set(elem.nsmap.itervalues())

    nsmap = dict(html5lib.constants.namespaces)
    nsmap[None] = nsmap.pop('html')
    non_html5_namespaces.update(nsmap)
    nsmap = non_html5_namespaces

    data = clone_element(data, nsmap=nsmap, in_context=False)

    # Remove unused namespace declarations
    fnsmap = {k:v for k,v in nsmap.iteritems() if v in seen_namespaces and v !=
            XMLNS_NS}
    return clone_element(data, nsmap=fnsmap, in_context=False)
github mdn / kuma / kuma / wiki / management / commands / generate_sphinx_template.py View on Github external
def handle(self, *args, **options):

        # Not ideal, but we need to temporarily remove inline elemnents as a
        # void/ignored element
        # TO DO:  Can this clone code be shortened?
        new_void_set = set()
        for item in html5lib_constants.voidElements:
            new_void_set.add(item)
        new_void_set.remove('link')
        new_void_set.remove('img')
        html5lib_constants.voidElements = frozenset(new_void_set)

        # Create a mock request for the sake of rendering the template
        request = RequestFactory().get('/')
        request.LANGUAGE_CODE = settings.LANGUAGE_CODE
        request.META['SERVER_NAME'] = 'developer.mozilla.org'

        # Load the page with sphinx template
        content = render(request, 'wiki/sphinx.html',
                         {'is_sphinx': True, 'gettext': ugettext}).content

        # Use a filter to make links absolute
        tool = parse(content, is_full_document=True)
        content = tool.absolutizeAddresses(
            base_url=settings.PRODUCTION_URL,
            tag_attributes={
                'a': 'href',
github NIT-Warangal / DispensaryMS / venv / lib / python2.7 / site-packages / html5lib / treebuilders / dom.py View on Github external
else:
                        rv.append("|%s"%(' '*indent, element.name))
                else:
                    rv.append("|%s"%(' '*indent,))
            elif element.nodeType == Node.DOCUMENT_NODE:
                rv.append("#document")
            elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
                rv.append("#document-fragment")
            elif element.nodeType == Node.COMMENT_NODE:
                rv.append("|%s"%(' '*indent, element.nodeValue))
            elif element.nodeType == Node.TEXT_NODE:
                rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
            else:
                if (hasattr(element, "namespaceURI") and
                    element.namespaceURI != None):
                    name = "%s %s"%(constants.prefixes[element.namespaceURI],
                                    element.nodeName)
                else:
                    name = element.nodeName
                rv.append("|%s<%s>"%(' '*indent, name))
                if element.hasAttributes():
                    i = 0
                    attr = element.attributes.item(i)
                    while attr:
                        name = attr.nodeName
                        value = attr.value
                        ns = attr.namespaceURI
                        if ns:
                            name = "%s %s"%(constants.prefixes[ns], attr.localName)
                        else:
                            name = attr.nodeName
                        i += 1
github Aalto-LeTech / a-plus / lib / html5lib / treebuilders / etree_lxml.py View on Github external
elif isinstance(element, str):
                #Text in a fragment
                rv.append("|%s\"%s\""%(' '*indent, element))
            else:
                #Fragment case
                rv.append("#document-fragment")
                for next_element in element:
                    serializeElement(next_element, indent+2)
        elif type(element.tag) == type(etree.Comment):
            rv.append("|%s"%(' '*indent, element.text))
        else:
            nsmatch = etree_builders.tag_regexp.match(element.tag)
            if nsmatch is not None:
                ns = nsmatch.group(1)
                tag = nsmatch.group(2)
                prefix = constants.prefixes[ns]
                rv.append("|%s<%s %s>"%(' '*indent, prefix,
                                        filter.fromXmlName(tag)))
            else:
                rv.append("|%s<%s>"%(' '*indent,
                                     filter.fromXmlName(element.tag)))

            if hasattr(element, "attrib"):
                for name, value in element.attrib.items():
                    nsmatch = etree_builders.tag_regexp.match(name)
                    if nsmatch:
                        ns = nsmatch.group(1)
                        name = nsmatch.group(2)
                        prefix = constants.prefixes[ns]
                        rv.append('|%s%s %s="%s"' % (' '*(indent+2), 
                                                  prefix,
                                                  filter.fromXmlName(name),
github maximilianh / pubMunch / lib / html5lib / treebuilders / etree_lxml.py View on Github external
elif isinstance(element, basestring):
                #Text in a fragment
                rv.append("|%s\"%s\""%(' '*indent, element))
            else:
                #Fragment case
                rv.append("#document-fragment")
                for next_element in element:
                    serializeElement(next_element, indent+2)
        elif type(element.tag) == type(etree.Comment):
            rv.append("|%s"%(' '*indent, element.text))
        else:
            nsmatch = etree_builders.tag_regexp.match(element.tag)
            if nsmatch is not None:
                ns = nsmatch.group(1)
                tag = nsmatch.group(2)
                prefix = constants.prefixes[ns]
                rv.append("|%s<%s %s>"%(' '*indent, prefix,
                                        filter.fromXmlName(tag)))
            else:
                rv.append("|%s<%s>"%(' '*indent,
                                     filter.fromXmlName(element.tag)))

            if hasattr(element, "attrib"):
                attributes = []
                for name, value in element.attrib.iteritems():
                    nsmatch = tag_regexp.match(name)
                    if nsmatch is not None:
                        ns, name = nsmatch.groups()
                        name = filter.fromXmlName(name)
                        prefix = constants.prefixes[ns]
                        attr_string = "%s %s"%(prefix, name)
                    else:
github maximilianh / pubMunch / lib / html5lib / treebuilders / etree.py View on Github external
if nsmatch is None:
                    name = element.tag
                else:
                    ns, name = nsmatch.groups()
                    prefix = constants.prefixes[ns]
                    name = "%s %s"%(prefix, name)
                rv.append("|%s<%s>"%(' '*indent, name))

                if hasattr(element, "attrib"):
                    attributes = []
                    for name, value in element.attrib.iteritems():
                        nsmatch = tag_regexp.match(name)
                        if nsmatch is not None:
                            ns, name = nsmatch.groups()
                            prefix = constants.prefixes[ns]
                            attr_string = "%s %s"%(prefix, name)
                        else:
                            attr_string = name
                        attributes.append((attr_string, value))

                    for name, value in sorted(attributes):
                        rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
                if element.text:
                    rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
            indent += 2
            for child in element:
                serializeElement(child, indent)
            if element.tail:
                rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
        serializeElement(element, 0)