How to use the bleach.html5lib_shim.BleachHTMLParser function in bleach

To help you get started, we’ve selected a few bleach examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozilla / bleach / tests / test_html5lib_shim.py View on Github external
def test_bleach_html_parser(parser_args, data, expected):
    args = {
        'tags': None,
        'strip': True,
        'consume_entities': True
    }
    args.update(parser_args)

    # Build a parser, walker, and serializer just like we do in clean()
    parser = html5lib_shim.BleachHTMLParser(**args)
    walker = html5lib_shim.getTreeWalker('etree')
    serializer = html5lib_shim.BleachHTMLSerializer(
        quote_attr_values='always',
        omit_optional_tags=False,
        escape_lt_in_attrs=True,
        resolve_entities=False,
        sanitize=False,
        alphabetical_attributes=False,
    )

    # Parse, walk, and then serialize the output
    dom = parser.parseFragment(data)
    serialized = serializer.render(walker(dom))

    assert serialized == expected
github mozilla / bleach / tests / test_html5lib_shim.py View on Github external
def test_serializer(data, expected):
    # Build a parser, walker, and serializer just like we do in clean()
    parser = html5lib_shim.BleachHTMLParser(
        tags=None,
        strip=True,
        consume_entities=False,
        namespaceHTMLElements=False
    )
    walker = html5lib_shim.getTreeWalker('etree')
    serializer = html5lib_shim.BleachHTMLSerializer(
        quote_attr_values='always',
        omit_optional_tags=False,
        escape_lt_in_attrs=True,
        resolve_entities=False,
        sanitize=False,
        alphabetical_attributes=False,
    )

    # Parse, walk, and then serialize the output
github mozilla / bleach / bleach / linkifier.py View on Github external
:arg list-of-strings recognized_tags: the list of tags that linkify knows about;
            everything else gets escaped

        :returns: linkified text as unicode

        """
        self.callbacks = callbacks
        self.skip_tags = skip_tags
        self.parse_email = parse_email
        self.url_re = url_re
        self.email_re = email_re

        # Create a parser/tokenizer that allows all HTML tags and escapes
        # anything not in that list.
        self.parser = html5lib_shim.BleachHTMLParser(
            tags=recognized_tags,
            strip=False,
            consume_entities=True,
            namespaceHTMLElements=False,
        )
        self.walker = html5lib_shim.getTreeWalker('etree')
        self.serializer = html5lib_shim.BleachHTMLSerializer(
            quote_attr_values='always',
            omit_optional_tags=False,

            # linkify does not sanitize
            sanitize=False,

            # linkify alphabetizes
            alphabetical_attributes=False,
        )
github mozilla / bleach / bleach / sanitizer.py View on Github external
.. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        """
        self.tags = tags
        self.attributes = attributes
        self.styles = styles
        self.protocols = protocols
        self.strip = strip
        self.strip_comments = strip_comments
        self.filters = filters or []

        self.parser = html5lib_shim.BleachHTMLParser(
            tags=self.tags,
            strip=self.strip,
            consume_entities=False,
            namespaceHTMLElements=False
        )
        self.walker = html5lib_shim.getTreeWalker('etree')
        self.serializer = html5lib_shim.BleachHTMLSerializer(
            quote_attr_values='always',
            omit_optional_tags=False,
            escape_lt_in_attrs=True,

            # We want to leave entities as they are without escaping or
            # resolving or expanding
            resolve_entities=False,

            # Bleach has its own sanitizer, so don't use the html5lib one
github mozilla / bleach / bleach / html5lib_shim.py View on Github external
def __init__(self, tags, strip, consume_entities, **kwargs):
        """
        :arg tags: list of allowed tags--everything else is either stripped or
            escaped; if None, then this doesn't look at tags at all
        :arg strip: whether to strip disallowed tags (True) or escape them (False);
            if tags=None, then this doesn't have any effect
        :arg consume_entities: whether to consume entities (default behavior) or
            leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)

        """
        self.tags = [tag.lower() for tag in tags] if tags is not None else None
        self.strip = strip
        self.consume_entities = consume_entities
        super(BleachHTMLParser, self).__init__(**kwargs)