How to use the feedparser.USER_AGENT function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github quodlibet / quodlibet / quodlibet / quodlibet / browsers / audiofeeds.py View on Github external
def _check_feed(self):
        """Validate stream a bit - failing fast where possible.

           Constructs an equivalent(ish) HEAD request,
           without re-writing feedparser completely.
           (it never times out if reading from a stream - see #2257)"""
        req = feedparser._build_urllib2_request(
            self.uri, feedparser.USER_AGENT, None, None, None, None, {})
        req.method = "HEAD"
        opener = build_opener(feedparser._FeedURLHandler())
        try:
            result = opener.open(req)
            ct_hdr = result.headers.get('Content-Type', "Unknown type")
            content_type = ct_hdr.split(';')[0]
            try:
                status = result.status
            except AttributeError:
                print_w("Missing status code for feed %s" % self.uri)
            else:
                print_d("Pre-check: %s returned %s with content type '%s'" %
                        (self.uri, status, content_type))
                if content_type not in feedparser.ACCEPT_HEADER:
                    print_w("Unusable content: %s. Perhaps %s is not a feed?" %
                            (content_type, self.uri))
github mmmonk / crap / home_stuff / rss_news.py View on Github external
s.sendmail(me, [me], msg.encode(rssfeed.encoding,'ignore'))
		s.quit()
		AddToDataFile(rssentry.link)
		DbPrint('message send ok')
	  except:
		DbPrint('message send not ok')

	  time.sleep(1)

#####################################################################

if __name__ == '__main__':

  random.seed()

  feedparser.USER_AGENT = HTTPHEADERS['User-Agent']
  if LoadFeeds(FEEDSFILE):
	LoadDataFile(DATAFILE)


	try:
	  for feed in RSSFEEDS:
		sleeptime=random.randint(2,15)
		DbPrint('sleeping for '+str(sleeptime)+' seconds')
		time.sleep(sleeptime)
		DbPrint('====================== start new feed ======================')
		ReadFeed(feed)

	except: 
	  DbPrint('interupted exiting')
   
	try:
github mmmonk / crap / home_stuff / rss_torrents.py View on Github external
rssentry.link = re.sub(RSSDOWNLOAD[host][0],RSSDOWNLOAD[host][1],rssentry.link)
		DbPrint('  new link '+rssentry.link)
	  
	  GetFile(url,rssentry.link)
	else:
	  DbPrint('not intrested in this torrent')



#####################################################################

if __name__ == '__main__':

  random.seed()

  feedparser.USER_AGENT = HTTPHEADERS['User-Agent']
  LoadDataFile(DATAFILE)

  try:
	for feed in RSSFEEDS:
	  DbPrint('====================== start new feed ======================')
	  ReadFeed(feed)
	  sleeptime=random.randint(2,15)
	  DbPrint('sleeping for '+str(sleeptime)+' seconds')
	  time.sleep(sleeptime)

  except KeyboardInterrupt:
	DbPrint('interupted exiting')
 
  try:
	tempfile = open(DATATMPFILE,'r')
        tempfile.close()
github MLB-LED-Scoreboard / mlb-led-scoreboard / data / headlines.py View on Github external
def update(self, force=False):
    if force == True or self.__should_update():
      debug.log("Headlines should update!")
      self.starttime = time.time()
      feeds = []
      debug.log("{} feeds to update...".format(len(self.feed_urls)))
      feedparser.USER_AGENT = "mlb-led-scoreboard/3.0 +https://github.com/MLB-LED-Scoreboard/mlb-led-scoreboard"
      if len(self.feed_urls) > 0:
        debug.log("Feed URLs found...")
        for idx, url in enumerate(self.feed_urls):
          if idx < HEADLINE_MAX_FEEDS: # Only parse MAX teams to prevent potential hangs
            debug.log("Fetching {}".format(url))
            f = feedparser.parse(url)
            try:
              title = f.feed.title.encode("ascii", "ignore")
              debug.log("Fetched feed '{}' with {} entries.".format(title, len(f.entries)))
              feeds.append(f)
            except AttributeError:
              debug.warning("There was a problem fetching {}".format(url))
        self.feed_data = feeds
github Rhilip / PT-help / modules / ptboard / script / backtracking.py View on Github external
"search_type": "id",  # 回爬方式
        # "enable": True,    # 如果启用该方法,请取消该注释
    },
]

# -*- 结束配置

# 构造数据库连接池
db = pymysql.connect(host=db_host, port=db_port, user=db_user, password=db_password, db=db_db,
                     autocommit=True, charset='utf8')
cursor = db.cursor()

headers = {
    'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
}
feedparser.USER_AGENT = 'FlexGet/2.10.61 (www.flexget.com)'


def cookies_raw2jar(raw: str) -> dict:
    """
    Arrange Cookies from raw using SimpleCookies
    """
    cookie = SimpleCookie(raw)
    sort_cookies = {}
    for key, morsel in cookie.items():
        sort_cookies[key] = morsel.value
    return sort_cookies


def has_title(tag):
    ret = False
    if tag.name == "a" and tag.has_attr('title'):
github Dieterbe / rss2email / rss2email.py View on Github external
# A pox on SunOS file locking methods
	if (sys.platform.find('sunos') == -1):
		unix = 1
except:
	pass

import socket; socket_errors = []
for e in ['error', 'gaierror']:
	if hasattr(socket, e): socket_errors.append(getattr(socket, e))

#DEPRECATED import mimify
#DEPRECATED from StringIO import StringIO as SIO
#DEPRECATED mimify.CHARSET = 'utf-8'

import feedparser
feedparser.USER_AGENT = "rss2email/"+__version__+ " +http://www.allthingsrss.com/rss2email/"

import html2text as h2t

h2t.UNICODE_SNOB = UNICODE_SNOB
h2t.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH
h2t.BODY_WIDTH = BODY_WIDTH
html2text = h2t.html2text

from types import *

### Utility Functions ###

import threading
class TimeoutError(Exception): pass

class InputError(Exception): pass
github ggaughan / pipe2py / pipe2py / modules / pipefetch.py View on Github external
# pipefetch.py
#

try:
    import speedparser as feedparser
except ImportError:
    import feedparser

    feedparser.USER_AGENT = (
        "pipe2py (feedparser/%s) +https://github.com/ggaughan/pipe2py" %
        feedparser.__version__
    )

from urllib2 import urlopen
from pipe2py.lib.dotdict import DotDict
from pipe2py import util


def pipe_fetch(context=None, _INPUT=None, conf=None, **kwargs):
    """Fetches and parses one or more feeds to yield the feed entries.

    Keyword arguments:
    context -- pipeline context
    _INPUT -- not used
    conf:
github ggaughan / pipe2py / pipe2py / modules / pipefetchsitefeed.py View on Github external
# pipefetchsitefeed.py
#

try:
    import speedparser as feedparser
except ImportError:
    import feedparser

    feedparser.USER_AGENT = (
        "pipe2py (feedparser/%s) +https://github.com/ggaughan/pipe2py" %
        feedparser.__version__
    )

from urllib2 import urlopen
from pipe2py.lib import autorss
from pipe2py import util
from pipe2py.lib.dotdict import DotDict


def pipe_fetchsitefeed(context=None, _INPUT=None, conf=None, **kwargs):
    """This source fetches and parses the first feed found on one or more sites
       to yield the feed entries.

    Keyword arguments:
    context -- pipeline context
github fazalmajid / temboz / tembozapp / update.py View on Github external
except ImportError:
  import Queue as queue
try:
  import urllib.parse as urlparse
except ImportError:
  import urlparse
try:
  import html.parser as HTMLParser
except ImportError:
  import HTMLParser

from . import param, normalize, util, transform, filters, dbop
import imp

#socket.setdefaulttimeout(10)
feedparser.USER_AGENT = param.user_agent

class ParseError(Exception):
  pass
class AutodiscoveryParseError(Exception):
  pass
class FeedAlreadyExists(Exception):
  pass
class UnknownError(Exception):
  pass

ratings = [
  ('all',      'all',           'All articles',     'item_rating is not null'),
  ('unread',   'unread',        'Unread only',       'item_rating = 0'),
  ('down',     'uninteresting', 'Uninteresting only','item_rating = -1'),
  ('up',       'interesting',   'Interesting only',  'item_rating > 0'),
  ('filtered', 'filtered',      'Filtered only',     'item_rating = -2')
github elgehelge / stocknews / stocknews.py View on Github external
"""
        # Setup URL parameters
        args = {}
        args['q'] = short_name
        args['output'] = 'rss'
        args['startdate'] = start_date
        args['enddate'] = end_date
        args['start'] = 0 # pagination (start from item number <'start'> ...)
        args['num'] = 100 # pagination (... and show the next <'num'> items.)

        # Initialize variables
        reached_end_of_feed = False
        all_feed_items = []

        # Fetch feed items until end of feed
        feedparser.USER_AGENT = self.USER_AGENT
        while not reached_end_of_feed:
            feed_url = 'https://www.google.com/finance/company_news?' \
                       + urllib.urlencode(args)
            feed = feedparser.parse(feed_url)
            all_feed_items += feed['items']
            reached_end_of_feed = len(feed['items']) != args['num']
            # Prepare for requesting next page
            args['start'] += args['num']

        return all_feed_items