How to use the feedparser._sanitizeHTML function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pygame / pygameweb / pygameweb / sanitize.py View on Github external
def sanitize_html(html, force_https=True):
    """ santise_html(html) returns some sanitized html.
          It can be used to try and avoid basic html insertion attacks.

        &gt;&gt;&gt; sanitize_html("<p>hello</p>")
        '<p>hello</p>'
        &gt;&gt;&gt; sanitize_html("")
        ''
    """
    clean_html = feedparser._sanitizeHTML(html, "utf-8", "text/html")
    if force_https:
        return clean_html.replace('src="http://', 'src="https://')
    else:
        return clean_html
github openSUSE / planet.opensuse.org / rawdoglib / rawdog.py View on Github external
def sanitise_html(html, baseurl, inline, config, type):
	"""Attempt to turn arbitrary feed-provided HTML into something
	suitable for safe inclusion into the rawdog output. The inline
	parameter says whether to expect a fragment of inline text, or a
	sequence of block-level elements."""
	if html is None:
		return None

	html = encode_references(html)

	# sgmllib handles "<br>/" as a SHORTTAG; this workaround from
	# feedparser.
	html = re.sub(r'(\S)/&gt;', r'\1 /&gt;', html)

	html = feedparser._resolveRelativeURIs(html, baseurl, "UTF-8", type)
	html = feedparser._sanitizeHTML(html, "UTF-8", type)

	if not inline and config["blocklevelhtml"]:
		# If we're after some block-level HTML and the HTML doesn't
		# start with a block-level element, then insert a <p> tag
		# before it. This still fails when the HTML contains text, then
		# a block-level element, then more text, but it's better than
		# nothing.
		if block_level_re.match(html) is None:
			html = "</p><p>" + html

	if config["tidyhtml"]:
		import mx.Tidy
		args = { "wrap": 0, "numeric_entities": 1 }
		plugins.call_hook("mxtidy_args", config, args, baseurl, inline)
		output = mx.Tidy.tidy(html, None, None,
		                      **args)[2]</p>
github wfrog / wfrog / wflogger / input / atom.py View on Github external
def do_run(self):
        import feedparser
    
        # Tweek feedparser to accept XML as content
        feedparser._FeedParserMixin.unknown_starttag = feedparser_unknown_starttag
        feedparser._FeedParserMixin.unknown_endtag = feedparser_unknown_endtag
        feedparser._sanitizeHTML = lambda source, encoding:  source
    
        self.logger.debug('Starting')
        
        # Does not accept events pre-dating the startup
        self.last_event = time.gmtime()
        
        if self.url == None:
            raise Exception('Attribute url must be set')
        
        while True:
            self.logger.debug("Reading feed")            
            feed = feedparser.parse(self.url)
                        
            last_update = self.last_event
            
            new_events=0
github ianlewis / django-lifestream / lifestream / feeds.py View on Github external
None, feedparser._FeedParserMixin))

if hasattr(feedparser, '_StrictFeedParser'):
    feedparser._StrictFeedParser.mapContentType = (
        types.MethodType(
            _mapContentType, 
            None, feedparser._StrictFeedParser))

feedparser._LooseFeedParser.mapContentType = (
        types.MethodType(
            _mapContentType, 
            None, feedparser._LooseFeedParser))

# Change out feedparser's html sanitizer for our own based
# on BeautifulSoup and our own tag/attribute stripper.
feedparser._sanitizeHTML = sanitize_html

def get_mod_class(plugin):
    """
    Converts 'lifestream.plugins.FeedPlugin' to
    ['lifestream.plugins', 'FeedPlugin']
    """
    try:
        dot = plugin.rindex('.')
    except ValueError:
        return plugin, ''
    return plugin[:dot], plugin[dot+1:]

try:
    from feedcache import Cache
    from util import CacheStorage
    # TODO: Use a cache storage object.
github amitu / dutils / dutils / utils.py View on Github external
subject, sender=settings.DEFAULT_FROM_EMAIL, recip="", context=None, 
    html_template="", text_template="", sender_name="",
    html_content="", text_content="", recip_list=None, sender_formatted=""
):
    from stripogram import html2text
    from feedparser import _sanitizeHTML

    if not context: context = {}
    if html_template:
        html = render(context, html_template)
    else: html = html_content
    if text_template:
        text = render(context, text_template)
    else: text = text_content
    if not text:
        text = html2text(_sanitizeHTML(html,charset))        

    if not recip_list: recip_list = []
    if recip: recip_list.append(recip)

    try:
        if getattr(settings, "EMAIL_USE_SSL", False):
            server = SMTP_SSL(settings.EMAIL_HOST, settings.EMAIL_PORT)
        else:
            server = SMTP(settings.EMAIL_HOST, settings.EMAIL_PORT)
        if settings.EMAIL_USE_TLS:
            server.ehlo()
            server.starttls()
            server.ehlo()
        if settings.EMAIL_HOST_USER and settings.EMAIL_HOST_PASSWORD:
            server.login(
                settings.EMAIL_HOST_USER, settings.EMAIL_HOST_PASSWORD
github mimecuvalo / helloworld / logic / content_remote.py View on Github external
def sanitize(value):
  return feedparser._sanitizeHTML(value, 'UTF-8', 'text/html')
github csytan / webnodes / apps / forums / templatetags / tag_extras.py View on Github external
def markdownify(value):
    import feedparser
    import markdown2
    value = urlfinder.sub(r'&lt;\1&gt;', value)
    value = urlfinder2.sub(r' &lt;\1&gt;', value)
    html = markdown2.markdown(value)
    html = feedparser._sanitizeHTML(html, 'utf-8')
    html = html.replace('