Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def clean_html(input):
# Reaplce html tags from user input, see utils.test for examples
ok_tags = [u"a", u"img", u"strong", u"b", u"em", u"i", u"u", u"ul", u"li", u"p", u"br", u"blockquote", u"code"]
ok_attributes = {u"a": [u"href", u"rel"], u"img": [u"src", u"alt", u"title"]}
# all other tags: replace with the content of the tag
# If input contains link in the format: then convert it to < http:// >
# This is because otherwise the library recognizes it as a tag and breaks the link.
input = re.sub("\<(http\S+?)\>", r'< \1 >', input)
cleaner = bleach.Cleaner(
filters=[
EmptyLinkFilter,
partial(bleach.linkifier.LinkifyFilter, callbacks=[nofollow]),
],
attributes=ok_attributes,
tags=ok_tags,
strip=True)
output = cleaner.clean(input)
return output
"acronym": ["title"],
"table": ["width"],
"td": ["width", "align"],
"div": ["class"],
"p": ["class"],
"span": ["class"],
}
ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel"]
ALLOWED_TLDS = sorted( # Sorting this list makes sure that shorter substring TLDs don't win against longer TLDs, e.g. matching '.com' before '.co'
list(set(suffix.rsplit(".")[-1] for suffix in PublicSuffixList()._publicsuffix)),
reverse=True,
)
TLD_REGEX = bleach.linkifier.build_url_re(tlds=ALLOWED_TLDS)
LINKIFIER = bleach.linkifier.Linker(url_re=TLD_REGEX, parse_email=True)
@register.filter
def rich_text(text: str):
"""Process markdown and cleans HTML in a text input."""
if not text:
return ""
body_md = LINKIFIER.linkify(
bleach.clean(
markdown.markdown(
str(text),
extensions=[
"markdown.extensions.nl2br",
"markdown.extensions.sane_lists",
"markdown.extensions.tables",
],
"abbr": ["title"],
"acronym": ["title"],
"table": ["width"],
"td": ["width", "align"],
"div": ["class"],
"p": ["class"],
"span": ["class"],
}
ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel"]
ALLOWED_TLDS = sorted( # Sorting this list makes sure that shorter substring TLDs don't win against longer TLDs, e.g. matching '.com' before '.co'
list(set(suffix.rsplit(".")[-1] for suffix in PublicSuffixList()._publicsuffix)),
reverse=True,
)
TLD_REGEX = bleach.linkifier.build_url_re(tlds=ALLOWED_TLDS)
LINKIFIER = bleach.linkifier.Linker(url_re=TLD_REGEX, parse_email=True)
@register.filter
def rich_text(text: str):
"""Process markdown and cleans HTML in a text input."""
if not text:
return ""
body_md = LINKIFIER.linkify(
bleach.clean(
markdown.markdown(
str(text),
extensions=[
"markdown.extensions.nl2br",
"markdown.extensions.sane_lists",
"markdown.extensions.tables",
def clean_localized_string(self):
# All links (text and markup) are normalized.
linkify_filter = partial(
bleach.linkifier.LinkifyFilter,
callbacks=[linkify_bounce_url_callback, bleach.callbacks.nofollow])
# Keep only the allowed tags and attributes, escape the rest.
cleaner = bleach.Cleaner(
tags=self.allowed_tags, attributes=self.allowed_attributes,
filters=[linkify_filter])
return cleaner.clean(str(self.localized_string))
def markdown(s: str) -> str:
commented_shortcodes = shortcodes.comment_shortcodes(s)
tainted_html = commonmark.commonmark(commented_shortcodes)
# Create a Cleaner that supports parsing of bare links (see filters).
cleaner = bleach.Cleaner(tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
styles=ALLOWED_STYLES,
strip_comments=False,
filters=[bleach.linkifier.LinkifyFilter])
safe_html = cleaner.clean(tainted_html)
return safe_html
tags = ALLOWED_TAGS
if attributes is None:
attributes = ALLOWED_ATTRIBUTES
if styles is None:
styles = ALLOWED_STYLES
# Clean the output using Bleach
cleaner = bleach.sanitizer.Cleaner(
tags=tags,
attributes=attributes,
styles=styles,
filters=[
# Bleach Linkify makes it easy to modify links, however, we will
# not be using it to create additional links.
functools.partial(
bleach.linkifier.LinkifyFilter,
callbacks=[
lambda attrs, new: attrs if not new else None,
bleach.callbacks.nofollow,
],
skip_tags=["pre"],
parse_email=False,
),
],
)
try:
cleaned = cleaner.clean(html)
return cleaned
except ValueError:
return None
whitelist_styles = getattr(settings, 'MARKDOWNIFY_WHITELIST_STYLES', bleach.sanitizer.ALLOWED_STYLES)
whitelist_protocols = getattr(settings, 'MARKDOWNIFY_WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS)
# Markdown settings
strip = getattr(settings, 'MARKDOWNIFY_STRIP', True)
extensions = getattr(settings, 'MARKDOWNIFY_MARKDOWN_EXTENSIONS', [])
# Bleach Linkify
linkify = None
linkify_text = getattr(settings, 'MARKDOWNIFY_LINKIFY_TEXT', True)
if linkify_text:
linkify_parse_email = getattr(settings, 'MARKDOWNIFY_LINKIFY_PARSE_EMAIL', False)
linkify_callbacks = getattr(settings, 'MARKDOWNIFY_LINKIFY_CALLBACKS', None)
linkify_skip_tags = getattr(settings, 'MARKDOWNIFY_LINKIFY_SKIP_TAGS', None)
linkifyfilter = bleach.linkifier.LinkifyFilter
linkify = [partial(linkifyfilter,
callbacks=linkify_callbacks,
skip_tags=linkify_skip_tags,
parse_email=linkify_parse_email
)]
# Convert markdown to html
html = markdown.markdown(text, extensions=extensions)
# Sanitize html if wanted
if getattr(settings, 'MARKDOWNIFY_BLEACH', True):
cleaner = bleach.Cleaner(tags=whitelist_tags,
attributes=whitelist_attrs,
styles=whitelist_styles,