How to use the bleach.linkifier function in bleach

To help you get started, we’ve selected a few bleach examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github MTG / freesound / utils / text.py View on Github external
def clean_html(input):
    # Reaplce html tags from user input, see utils.test for examples

    ok_tags = [u"a", u"img", u"strong", u"b", u"em", u"i", u"u", u"ul", u"li", u"p", u"br",  u"blockquote", u"code"]
    ok_attributes = {u"a": [u"href", u"rel"], u"img": [u"src", u"alt", u"title"]}
    # all other tags: replace with the content of the tag

    # If input contains link in the format:  then convert it to < http:// >
    # This is because otherwise the library recognizes it as a tag and breaks the link.
    input = re.sub("\<(http\S+?)\>", r'< \1 >', input)

    cleaner = bleach.Cleaner(
            filters=[
                EmptyLinkFilter,
                partial(bleach.linkifier.LinkifyFilter, callbacks=[nofollow]),
                ],
            attributes=ok_attributes,
            tags=ok_tags,
            strip=True)
    output = cleaner.clean(input)
    return output
github pretalx / pretalx / src / pretalx / common / templatetags / rich_text.py View on Github external
"acronym": ["title"],
    "table": ["width"],
    "td": ["width", "align"],
    "div": ["class"],
    "p": ["class"],
    "span": ["class"],
}

ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel"]

ALLOWED_TLDS = sorted(  # Sorting this list makes sure that shorter substring TLDs don't win against longer TLDs, e.g. matching '.com' before '.co'
    list(set(suffix.rsplit(".")[-1] for suffix in PublicSuffixList()._publicsuffix)),
    reverse=True,
)
TLD_REGEX = bleach.linkifier.build_url_re(tlds=ALLOWED_TLDS)
LINKIFIER = bleach.linkifier.Linker(url_re=TLD_REGEX, parse_email=True)


@register.filter
def rich_text(text: str):
    """Process markdown and cleans HTML in a text input."""
    if not text:
        return ""
    body_md = LINKIFIER.linkify(
        bleach.clean(
            markdown.markdown(
                str(text),
                extensions=[
                    "markdown.extensions.nl2br",
                    "markdown.extensions.sane_lists",
                    "markdown.extensions.tables",
                ],
github pretalx / pretalx / src / pretalx / common / templatetags / rich_text.py View on Github external
"abbr": ["title"],
    "acronym": ["title"],
    "table": ["width"],
    "td": ["width", "align"],
    "div": ["class"],
    "p": ["class"],
    "span": ["class"],
}

ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel"]

ALLOWED_TLDS = sorted(  # Sorting this list makes sure that shorter substring TLDs don't win against longer TLDs, e.g. matching '.com' before '.co'
    list(set(suffix.rsplit(".")[-1] for suffix in PublicSuffixList()._publicsuffix)),
    reverse=True,
)
TLD_REGEX = bleach.linkifier.build_url_re(tlds=ALLOWED_TLDS)
LINKIFIER = bleach.linkifier.Linker(url_re=TLD_REGEX, parse_email=True)


@register.filter
def rich_text(text: str):
    """Process markdown and cleans HTML in a text input."""
    if not text:
        return ""
    body_md = LINKIFIER.linkify(
        bleach.clean(
            markdown.markdown(
                str(text),
                extensions=[
                    "markdown.extensions.nl2br",
                    "markdown.extensions.sane_lists",
                    "markdown.extensions.tables",
github mozilla / addons-server / src / olympia / translations / models.py View on Github external
def clean_localized_string(self):
        # All links (text and markup) are normalized.
        linkify_filter = partial(
            bleach.linkifier.LinkifyFilter,
            callbacks=[linkify_bounce_url_callback, bleach.callbacks.nofollow])
        # Keep only the allowed tags and attributes, escape the rest.
        cleaner = bleach.Cleaner(
            tags=self.allowed_tags, attributes=self.allowed_attributes,
            filters=[linkify_filter])

        return cleaner.clean(str(self.localized_string))
github armadillica / pillar / pillar / markdown.py View on Github external
def markdown(s: str) -> str:
    commented_shortcodes = shortcodes.comment_shortcodes(s)
    tainted_html = commonmark.commonmark(commented_shortcodes)

    # Create a Cleaner that supports parsing of bare links (see filters).
    cleaner = bleach.Cleaner(tags=ALLOWED_TAGS,
                             attributes=ALLOWED_ATTRIBUTES,
                             styles=ALLOWED_STYLES,
                             strip_comments=False,
                             filters=[bleach.linkifier.LinkifyFilter])

    safe_html = cleaner.clean(tainted_html)
    return safe_html
github pypa / readme_renderer / readme_renderer / clean.py View on Github external
tags = ALLOWED_TAGS
    if attributes is None:
        attributes = ALLOWED_ATTRIBUTES
    if styles is None:
        styles = ALLOWED_STYLES

    # Clean the output using Bleach
    cleaner = bleach.sanitizer.Cleaner(
        tags=tags,
        attributes=attributes,
        styles=styles,
        filters=[
            # Bleach Linkify makes it easy to modify links, however, we will
            # not be using it to create additional links.
            functools.partial(
                bleach.linkifier.LinkifyFilter,
                callbacks=[
                    lambda attrs, new: attrs if not new else None,
                    bleach.callbacks.nofollow,
                ],
                skip_tags=["pre"],
                parse_email=False,
            ),
        ],
    )
    try:
        cleaned = cleaner.clean(html)
        return cleaned
    except ValueError:
        return None
github erwinmatijsen / django-markdownify / markdownify / templatetags / markdownify.py View on Github external
whitelist_styles = getattr(settings, 'MARKDOWNIFY_WHITELIST_STYLES', bleach.sanitizer.ALLOWED_STYLES)
    whitelist_protocols = getattr(settings, 'MARKDOWNIFY_WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS)

    # Markdown settings
    strip = getattr(settings, 'MARKDOWNIFY_STRIP', True)
    extensions = getattr(settings, 'MARKDOWNIFY_MARKDOWN_EXTENSIONS', [])

    # Bleach Linkify
    linkify = None
    linkify_text = getattr(settings, 'MARKDOWNIFY_LINKIFY_TEXT', True)

    if linkify_text:
        linkify_parse_email = getattr(settings, 'MARKDOWNIFY_LINKIFY_PARSE_EMAIL', False)
        linkify_callbacks = getattr(settings, 'MARKDOWNIFY_LINKIFY_CALLBACKS', None)
        linkify_skip_tags = getattr(settings, 'MARKDOWNIFY_LINKIFY_SKIP_TAGS', None)
        linkifyfilter = bleach.linkifier.LinkifyFilter

        linkify = [partial(linkifyfilter,
                callbacks=linkify_callbacks,
                skip_tags=linkify_skip_tags,
                parse_email=linkify_parse_email
                )]

    # Convert markdown to html
    html = markdown.markdown(text, extensions=extensions)

    # Sanitize html if wanted
    if getattr(settings, 'MARKDOWNIFY_BLEACH', True):

        cleaner = bleach.Cleaner(tags=whitelist_tags,
                                 attributes=whitelist_attrs,
                                 styles=whitelist_styles,