How to use the bleach.html5lib_shim function in bleach

To help you get started, we’ve selected a few bleach examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozilla / bleach / bleach / sanitizer.py View on Github external
return attr in attr_val

            return False

        return _attr_filter

    if isinstance(attributes, list):
        def _attr_filter(tag, attr, value):
            return attr in attributes

        return _attr_filter

    raise ValueError('attributes needs to be a callable, a list or a dict')


class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
    """html5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    """
    def __init__(self, source, attributes=ALLOWED_ATTRIBUTES,
                 strip_disallowed_elements=False, strip_html_comments=True,
                 **kwargs):
        """Creates a BleachSanitizerFilter instance

        :arg Treewalker source: stream

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
github mozilla / bleach / bleach / sanitizer.py View on Github external
new_val = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                     ' ',
                                     unescape(val))
                    new_val = new_val.strip()
                    if not new_val:
                        continue

                    else:
                        # Replace the val with the unescaped version because
                        # it's a iri
                        val = new_val

                # Drop href and xlink:href attr for svg elements with non-local IRIs
                if (None, token['name']) in self.svg_allow_local_href:
                    if namespaced_name in [
                            (None, 'href'), (html5lib_shim.namespaces['xlink'], 'href')
                    ]:
                        if re.search(r'^\s*[^#\s]', val):
                            continue

                # If it's a style attribute, sanitize it
                if namespaced_name == (None, 'style'):
                    val = self.sanitize_css(val)

                # At this point, we want to keep the attribute, so add it in
                attrs[namespaced_name] = val

            token['data'] = alphabetize_attributes(attrs)

        return token
github mozilla / bleach / bleach / linkifier.py View on Github external
if not text:
            return ''

        dom = self.parser.parseFragment(text)
        filtered = LinkifyFilter(
            source=self.walker(dom),
            callbacks=self.callbacks,
            skip_tags=self.skip_tags,
            parse_email=self.parse_email,
            url_re=self.url_re,
            email_re=self.email_re,
        )
        return self.serializer.render(filtered)


class LinkifyFilter(html5lib_shim.Filter):
    """html5lib filter that linkifies text

    This will do the following:

    * convert email addresses into links
    * convert urls into links
    * edit existing links by running them through callbacks--the default is to
      add a ``rel="nofollow"``

    This filter can be used anywhere html5lib filters can be used.

    """
    def __init__(self, source, callbacks=None, skip_tags=None, parse_email=False,
                 url_re=URL_RE, email_re=EMAIL_RE):
        """Creates a LinkifyFilter instance
github mozilla / bleach / bleach / linkifier.py View on Github external
def __init__(self, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False,
                 url_re=URL_RE, email_re=EMAIL_RE, recognized_tags=html5lib_shim.HTML_TAGS):
        """Creates a Linker instance

        :arg list callbacks: list of callbacks to run when adjusting tag attributes;
            defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``

        :arg list skip_tags: list of tags that you don't want to linkify the
            contents of; for example, you could set this to ``['pre']`` to skip
            linkifying contents of ``pre`` tags

        :arg bool parse_email: whether or not to linkify email addresses

        :arg re url_re: url matching regex

        :arg re email_re: email matching regex

        :arg list-of-strings recognized_tags: the list of tags that linkify knows about;
github mozilla / bleach / bleach / linkifier.py View on Github external
def build_url_re(tlds=TLDS, protocols=html5lib_shim.allowed_protocols):
    """Builds the url regex used by linkifier

   If you want a different set of tlds or allowed protocols, pass those in
   and stomp on the existing ``url_re``::

       from bleach import linkifier

       my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)

       linker = LinkifyFilter(url_re=my_url_re)

    """
    return re.compile(
        r"""\(*  # Match any opening parentheses.
        \b(?