How to use the bleach.html5lib_shim.BleachHTMLSerializer function in bleach

To help you get started, we’ve selected a few bleach examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozilla / bleach / tests / test_html5lib_shim.py View on Github external
def test_serializer(data, expected):
    # Build a parser, walker, and serializer just like we do in clean()
    parser = html5lib_shim.BleachHTMLParser(
        tags=None,
        strip=True,
        consume_entities=False,
        namespaceHTMLElements=False
    )
    walker = html5lib_shim.getTreeWalker('etree')
    serializer = html5lib_shim.BleachHTMLSerializer(
        quote_attr_values='always',
        omit_optional_tags=False,
        escape_lt_in_attrs=True,
        resolve_entities=False,
        sanitize=False,
        alphabetical_attributes=False,
    )

    # Parse, walk, and then serialize the output
    dom = parser.parseFragment(data)
    serialized = serializer.render(walker(dom))

    assert serialized == expected
github mozilla / bleach / tests / test_html5lib_shim.py View on Github external
def test_bleach_html_parser(parser_args, data, expected):
    args = {
        'tags': None,
        'strip': True,
        'consume_entities': True
    }
    args.update(parser_args)

    # Build a parser, walker, and serializer just like we do in clean()
    parser = html5lib_shim.BleachHTMLParser(**args)
    walker = html5lib_shim.getTreeWalker('etree')
    serializer = html5lib_shim.BleachHTMLSerializer(
        quote_attr_values='always',
        omit_optional_tags=False,
        escape_lt_in_attrs=True,
        resolve_entities=False,
        sanitize=False,
        alphabetical_attributes=False,
    )

    # Parse, walk, and then serialize the output
    dom = parser.parseFragment(data)
    serialized = serializer.render(walker(dom))

    assert serialized == expected
github mozilla / bleach / bleach / linkifier.py View on Github external
self.callbacks = callbacks
        self.skip_tags = skip_tags
        self.parse_email = parse_email
        self.url_re = url_re
        self.email_re = email_re

        # Create a parser/tokenizer that allows all HTML tags and escapes
        # anything not in that list.
        self.parser = html5lib_shim.BleachHTMLParser(
            tags=recognized_tags,
            strip=False,
            consume_entities=True,
            namespaceHTMLElements=False,
        )
        self.walker = html5lib_shim.getTreeWalker('etree')
        self.serializer = html5lib_shim.BleachHTMLSerializer(
            quote_attr_values='always',
            omit_optional_tags=False,

            # linkify does not sanitize
            sanitize=False,

            # linkify alphabetizes
            alphabetical_attributes=False,
        )
github mozilla / bleach / bleach / sanitizer.py View on Github external
self.tags = tags
        self.attributes = attributes
        self.styles = styles
        self.protocols = protocols
        self.strip = strip
        self.strip_comments = strip_comments
        self.filters = filters or []

        self.parser = html5lib_shim.BleachHTMLParser(
            tags=self.tags,
            strip=self.strip,
            consume_entities=False,
            namespaceHTMLElements=False
        )
        self.walker = html5lib_shim.getTreeWalker('etree')
        self.serializer = html5lib_shim.BleachHTMLSerializer(
            quote_attr_values='always',
            omit_optional_tags=False,
            escape_lt_in_attrs=True,

            # We want to leave entities as they are without escaping or
            # resolving or expanding
            resolve_entities=False,

            # Bleach has its own sanitizer, so don't use the html5lib one
            sanitize=False,

            # Bleach sanitizer alphabetizes already, so don't use the html5lib one
            alphabetical_attributes=False,
        )
github mozilla / bleach / bleach / html5lib_shim.py View on Github external
def serialize(self, treewalker, encoding=None):
        """Wrap HTMLSerializer.serialize and conver & to & in attribute values

        Note that this converts & to & in attribute values where the & isn't
        already part of an unambiguous character entity.

        """
        in_tag = False
        after_equals = False

        for stoken in super(BleachHTMLSerializer, self).serialize(treewalker, encoding):
            if in_tag:
                if stoken == '>':
                    in_tag = False

                elif after_equals:
                    if stoken != '"':
                        for part in self.escape_base_amp(stoken):
                            yield part

                        after_equals = False
                        continue

                elif stoken == '=':
                    after_equals = True

                yield stoken