How to use the feedparser._HTMLSanitizer function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github fazalmajid / temboz / feedparser.py View on Github external
def _sanitizeHTML(htmlSource):
    p = _HTMLSanitizer()
    p.feed(htmlSource)
    data = p.output()
#    if _mxtidy:
#        nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, output_xhtml=1, numeric_entities=1, wrap=0)
#        if data.count(''):
#                data = data.split('>', 1)[1]
#        if data.count('
github apache / allura / Allura / allura / lib / markdown_extensions.py View on Github external
def run(self, text):
        try:
            p = feedparser._HTMLSanitizer('utf-8')
        except TypeError: # $@%## pre-released versions from SOG
            p = feedparser._HTMLSanitizer('utf-8', '')
        p.feed(text.encode('utf-8'))
        return unicode(p.output(), 'utf-8')
github ranveeraggarwal / iitb-blog-aggregator / rawdog / rawdoglib / rawdog.py View on Github external
if html is None:
		return None

	html = encode_references(html)
	type = "text/html"

	# sgmllib handles "<br>/" as a SHORTTAG; this workaround from
	# feedparser.
	html = re.sub(r'(\S)/&gt;', r'\1 /&gt;', html)

	# sgmllib is fragile with broken processing instructions (e.g.
	# ""); just remove them all.
	html = re.sub(r']*&gt;', '', html)

	html = feedparser._resolveRelativeURIs(html, baseurl, "UTF-8", type)
	p = feedparser._HTMLSanitizer("UTF-8", type)
	p.feed(html)
	html = p.output()

	if not inline and config["blocklevelhtml"]:
		# If we're after some block-level HTML and the HTML doesn't
		# start with a block-level element, then insert a <p> tag
		# before it. This still fails when the HTML contains text, then
		# a block-level element, then more text, but it's better than
		# nothing.
		if block_level_re.match(html) is None:
			html = "</p><p>" + html

	if config["tidyhtml"]:
		args = {
			"numeric_entities": 1,
			"output_html": 1,</p>
github stsquad / Gwibber / gwibber / microblog / greader.py View on Github external
"version": 0.1,
  
  "config": [
    "private:password",
    "username",
    "message_color",
    "receive_enabled",
  ],

  "features": [
    can.RECEIVE,
    can.READ,
  ],
}

feedparser._HTMLSanitizer.acceptable_elements = []

class Message:
  def __init__(self, client, data):
    self.client = client
    self.account = client.account
    self.protocol = client.account["protocol"]
    
    self.source = ""
    self.sender = data.get("author", "")
    if hasattr(data, "source"):
        self.source = data.source.get("title", "")
        self.sender = data.source.get("title", "")
        
    self.gr_id = data.get("id", "")
    
    self.sender_nick = data.get("author", "")
github apache / allura / Allura / allura / lib / markdown_extensions.py View on Github external
def run(self, text):
        try:
            p = feedparser._HTMLSanitizer('utf-8')
        except TypeError: # $@%## pre-released versions from SOG
            p = feedparser._HTMLSanitizer('utf-8', '')
        p.feed(text.encode('utf-8'))
        return unicode(p.output(), 'utf-8')