How to use the feedparser._HTMLSanitizer.acceptable_elements function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mimecuvalo / helloworld / controllers / push.py View on Github external
from base import BaseHandler

import feedparser
import tornado.web

from logic import content_remote

# monkeypatch
feedparser._HTMLSanitizer.acceptable_elements = \
    feedparser._HTMLSanitizer.acceptable_elements + ['iframe']

class PushHandler(BaseHandler):
  def get(self):
    self.write(self.get_argument('hub.challenge'))

  # tornado & python rock
  def check_xsrf_cookie(self):
    pass

  def post(self):
    user = self.models.users.get(username=self.breadcrumbs["profile"])[0]

    if not user:
      return

    feed_doc = feedparser.parse(self.request.body)
github appdotnet / pourover / buster / application / fetcher.py View on Github external
import datetime
import hashlib
import logging


import feedparser
from google.appengine.ext import ndb
from google.appengine.api import urlfetch
from constants import VALID_STATUS
from utils import find_feed_url

logger = logging.getLogger(__name__)

# Monkeypatch feedparser
feedparser._HTMLSanitizer.acceptable_elements = set(list(feedparser._HTMLSanitizer.acceptable_elements) + ["object", "embed", "iframe", "param"])


class FetchException(Exception):
    pass


# Don't complain about this
ndb.add_flow_exception(FetchException)


@ndb.tasklet
def fetch_url(url, etag=None, user_agent=None):
    # Handle network issues here, handle other exceptions where this is called from

    # GAE's built in urlfetch doesn't expose what HTTP Status caused a request to follow
    # a redirect. Which is important in this case because on 301 we are suppose to update the
github mimecuvalo / helloworld / logic / content_remote.py View on Github external
import random
import re
import urllib
import urllib2
import urlparse
from time import mktime

from BeautifulSoup import BeautifulSoup
import feedparser
import tornado.escape

from logic import users

# monkeypatch
feedparser._HTMLSanitizer.acceptable_elements = \
    feedparser._HTMLSanitizer.acceptable_elements + ['iframe']

def parse_feed(models, user, feed=None, parsed_feed=None, max_days_old=30,
    remote_comments=False):
  if remote_comments:
    bs_feed_doc = BeautifulSoup(feed)
    bs_entries = bs_feed_doc.findAll('entry')
  feed_doc = feedparser.parse(parsed_feed or feed)

  for index, entry in enumerate(feed_doc.entries):
    entry_id = entry.id if entry.has_key('id') else entry.link
    exists = models.content_remote.get(to_username=user.local_username,
        post_id=entry_id)[0]

    comments_count = 0
    comments_updated = None
    if 'links' in entry:
github mimecuvalo / helloworld / controllers / push.py View on Github external
from base import BaseHandler

import feedparser
import tornado.web

from logic import content_remote

# monkeypatch
feedparser._HTMLSanitizer.acceptable_elements = \
    feedparser._HTMLSanitizer.acceptable_elements + ['iframe']

class PushHandler(BaseHandler):
  def get(self):
    self.write(self.get_argument('hub.challenge'))

  # tornado & python rock
  def check_xsrf_cookie(self):
    pass

  def post(self):
    user = self.models.users.get(username=self.breadcrumbs["profile"])[0]

    if not user:
      return
github mimecuvalo / helloworld / logic / content_remote.py View on Github external
import json
import random
import re
import urllib
import urllib2
import urlparse
from time import mktime

from BeautifulSoup import BeautifulSoup
import feedparser
import tornado.escape

from logic import users

# monkeypatch
feedparser._HTMLSanitizer.acceptable_elements = feedparser._HTMLSanitizer.acceptable_elements + ['iframe']

def parse_feed(models, user, feed=None, parsed_feed=None, max_days_old=30, remote_comments=False):
  if remote_comments:
    bs_feed_doc = BeautifulSoup(feed)
    bs_entries = bs_feed_doc.findAll('entry')
  feed_doc = feedparser.parse(parsed_feed or feed)

  for index, entry in enumerate(feed_doc.entries):
    entry_id = entry.id if entry.has_key('id') else entry.link
    exists = models.content_remote.get(to_username=user.local_username, post_id=entry_id)[0]

    comments_count = 0
    comments_updated = None
    if 'links' in entry:
      for link in entry.links:
        if link['rel'] == 'replies':
github mimecuvalo / helloworld / logic / content_remote.py View on Github external
import json
import random
import re
import urllib
import urllib2
import urlparse
from time import mktime

from BeautifulSoup import BeautifulSoup
import feedparser
import tornado.escape

from logic import users

# monkeypatch
feedparser._HTMLSanitizer.acceptable_elements = \
    feedparser._HTMLSanitizer.acceptable_elements + ['iframe']

def parse_feed(models, user, feed=None, parsed_feed=None, max_days_old=30,
    remote_comments=False):
  if remote_comments:
    bs_feed_doc = BeautifulSoup(feed)
    bs_entries = bs_feed_doc.findAll('entry')
  feed_doc = feedparser.parse(parsed_feed or feed)

  for index, entry in enumerate(feed_doc.entries):
    entry_id = entry.id if entry.has_key('id') else entry.link
    exists = models.content_remote.get(to_username=user.local_username,
        post_id=entry_id)[0]

    comments_count = 0
    comments_updated = None
github stsquad / Gwibber / gwibber / microblog / rss.py View on Github external
PROTOCOL_INFO = {
  "name": "RSS/Atom",
  "version": 0.1,
  
  "config": [
    "feed_url",
    "message_color",
    "receive_enabled",
  ],

  "features": [
    can.RECEIVE,
  ],
}

feedparser._HTMLSanitizer.acceptable_elements = []

def account_name(acct):
  if acct["feed_url"]:
    return urlparse.urlparse(acct["feed_url"])[1]

class Message:
  def __init__(self, client, data):
    self.client = client
    self.account = client.account
    self.protocol = client.account["protocol"]
    self.sender = data.get("author", "")
    self.sender_nick = self.sender
    self.sender_id = self.sender

    if hasattr(data, "summary"):
      self.text = data.summary
github feedhq / feedhq / feedhq / feeds / models.py View on Github external
def error_display(self):
        if self.muted:
            key = self.error
        else:
            key = str(self.job_details['error'])
        return UniqueFeed.MUTE_DICT.get(key, _('Error'))


class EntryManager(models.Manager):
    def unread(self):
        return self.filter(read=False).count()


class BaseEntry(object):
    ELEMENTS = (
        feedparser._HTMLSanitizer.acceptable_elements |
        feedparser._HTMLSanitizer.mathml_elements |
        feedparser._HTMLSanitizer.svg_elements |
        set(['iframe', 'object', 'embed', 'script'])
    ) - set(['font'])
    ATTRIBUTES = (
        feedparser._HTMLSanitizer.acceptable_attributes |
        feedparser._HTMLSanitizer.mathml_attributes |
        feedparser._HTMLSanitizer.svg_attributes
    ) - set(['id', 'class'])
    CSS_PROPERTIES = feedparser._HTMLSanitizer.acceptable_css_properties

    @property
    def hex_pk(self):
        value = hex(struct.unpack("L", struct.pack("l", self.pk))[0])
        if value.endswith("L"):
            value = value[:-1]