Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _check_feed(self):
"""Validate stream a bit - failing fast where possible.
Constructs an equivalent(ish) HEAD request,
without re-writing feedparser completely.
(it never times out if reading from a stream - see #2257)"""
req = feedparser._build_urllib2_request(
self.uri, feedparser.USER_AGENT, None, None, None, None, {})
req.method = "HEAD"
opener = build_opener(feedparser._FeedURLHandler())
try:
result = opener.open(req)
ct_hdr = result.headers.get('Content-Type', "Unknown type")
content_type = ct_hdr.split(';')[0]
try:
status = result.status
except AttributeError:
print_w("Missing status code for feed %s" % self.uri)
else:
print_d("Pre-check: %s returned %s with content type '%s'" %
(self.uri, status, content_type))
if content_type not in feedparser.ACCEPT_HEADER:
print_w("Unusable content: %s. Perhaps %s is not a feed?" %
(content_type, self.uri))
s.sendmail(me, [me], msg.encode(rssfeed.encoding,'ignore'))
s.quit()
AddToDataFile(rssentry.link)
DbPrint('message send ok')
except:
DbPrint('message send not ok')
time.sleep(1)
#####################################################################
if __name__ == '__main__':
random.seed()
feedparser.USER_AGENT = HTTPHEADERS['User-Agent']
if LoadFeeds(FEEDSFILE):
LoadDataFile(DATAFILE)
try:
for feed in RSSFEEDS:
sleeptime=random.randint(2,15)
DbPrint('sleeping for '+str(sleeptime)+' seconds')
time.sleep(sleeptime)
DbPrint('====================== start new feed ======================')
ReadFeed(feed)
except:
DbPrint('interupted exiting')
try:
rssentry.link = re.sub(RSSDOWNLOAD[host][0],RSSDOWNLOAD[host][1],rssentry.link)
DbPrint(' new link '+rssentry.link)
GetFile(url,rssentry.link)
else:
DbPrint('not intrested in this torrent')
#####################################################################
if __name__ == '__main__':
random.seed()
feedparser.USER_AGENT = HTTPHEADERS['User-Agent']
LoadDataFile(DATAFILE)
try:
for feed in RSSFEEDS:
DbPrint('====================== start new feed ======================')
ReadFeed(feed)
sleeptime=random.randint(2,15)
DbPrint('sleeping for '+str(sleeptime)+' seconds')
time.sleep(sleeptime)
except KeyboardInterrupt:
DbPrint('interupted exiting')
try:
tempfile = open(DATATMPFILE,'r')
tempfile.close()
def update(self, force=False):
if force == True or self.__should_update():
debug.log("Headlines should update!")
self.starttime = time.time()
feeds = []
debug.log("{} feeds to update...".format(len(self.feed_urls)))
feedparser.USER_AGENT = "mlb-led-scoreboard/3.0 +https://github.com/MLB-LED-Scoreboard/mlb-led-scoreboard"
if len(self.feed_urls) > 0:
debug.log("Feed URLs found...")
for idx, url in enumerate(self.feed_urls):
if idx < HEADLINE_MAX_FEEDS: # Only parse MAX teams to prevent potential hangs
debug.log("Fetching {}".format(url))
f = feedparser.parse(url)
try:
title = f.feed.title.encode("ascii", "ignore")
debug.log("Fetched feed '{}' with {} entries.".format(title, len(f.entries)))
feeds.append(f)
except AttributeError:
debug.warning("There was a problem fetching {}".format(url))
self.feed_data = feeds
"search_type": "id", # 回爬方式
# "enable": True, # 如果启用该方法,请取消该注释
},
]
# -*- 结束配置
# 构造数据库连接池
db = pymysql.connect(host=db_host, port=db_port, user=db_user, password=db_password, db=db_db,
autocommit=True, charset='utf8')
cursor = db.cursor()
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
}
feedparser.USER_AGENT = 'FlexGet/2.10.61 (www.flexget.com)'
def cookies_raw2jar(raw: str) -> dict:
"""
Arrange Cookies from raw using SimpleCookies
"""
cookie = SimpleCookie(raw)
sort_cookies = {}
for key, morsel in cookie.items():
sort_cookies[key] = morsel.value
return sort_cookies
def has_title(tag):
ret = False
if tag.name == "a" and tag.has_attr('title'):
# A pox on SunOS file locking methods
if (sys.platform.find('sunos') == -1):
unix = 1
except:
pass
import socket; socket_errors = []
for e in ['error', 'gaierror']:
if hasattr(socket, e): socket_errors.append(getattr(socket, e))
#DEPRECATED import mimify
#DEPRECATED from StringIO import StringIO as SIO
#DEPRECATED mimify.CHARSET = 'utf-8'
import feedparser
feedparser.USER_AGENT = "rss2email/"+__version__+ " +http://www.allthingsrss.com/rss2email/"
import html2text as h2t
h2t.UNICODE_SNOB = UNICODE_SNOB
h2t.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH
h2t.BODY_WIDTH = BODY_WIDTH
html2text = h2t.html2text
from types import *
### Utility Functions ###
import threading
class TimeoutError(Exception): pass
class InputError(Exception): pass
# pipefetch.py
#
try:
import speedparser as feedparser
except ImportError:
import feedparser
feedparser.USER_AGENT = (
"pipe2py (feedparser/%s) +https://github.com/ggaughan/pipe2py" %
feedparser.__version__
)
from urllib2 import urlopen
from pipe2py.lib.dotdict import DotDict
from pipe2py import util
def pipe_fetch(context=None, _INPUT=None, conf=None, **kwargs):
"""Fetches and parses one or more feeds to yield the feed entries.
Keyword arguments:
context -- pipeline context
_INPUT -- not used
conf:
# pipefetchsitefeed.py
#
try:
import speedparser as feedparser
except ImportError:
import feedparser
feedparser.USER_AGENT = (
"pipe2py (feedparser/%s) +https://github.com/ggaughan/pipe2py" %
feedparser.__version__
)
from urllib2 import urlopen
from pipe2py.lib import autorss
from pipe2py import util
from pipe2py.lib.dotdict import DotDict
def pipe_fetchsitefeed(context=None, _INPUT=None, conf=None, **kwargs):
"""This source fetches and parses the first feed found on one or more sites
to yield the feed entries.
Keyword arguments:
context -- pipeline context
except ImportError:
import Queue as queue
try:
import urllib.parse as urlparse
except ImportError:
import urlparse
try:
import html.parser as HTMLParser
except ImportError:
import HTMLParser
from . import param, normalize, util, transform, filters, dbop
import imp
#socket.setdefaulttimeout(10)
feedparser.USER_AGENT = param.user_agent
class ParseError(Exception):
pass
class AutodiscoveryParseError(Exception):
pass
class FeedAlreadyExists(Exception):
pass
class UnknownError(Exception):
pass
ratings = [
('all', 'all', 'All articles', 'item_rating is not null'),
('unread', 'unread', 'Unread only', 'item_rating = 0'),
('down', 'uninteresting', 'Uninteresting only','item_rating = -1'),
('up', 'interesting', 'Interesting only', 'item_rating > 0'),
('filtered', 'filtered', 'Filtered only', 'item_rating = -2')
"""
# Setup URL parameters
args = {}
args['q'] = short_name
args['output'] = 'rss'
args['startdate'] = start_date
args['enddate'] = end_date
args['start'] = 0 # pagination (start from item number <'start'> ...)
args['num'] = 100 # pagination (... and show the next <'num'> items.)
# Initialize variables
reached_end_of_feed = False
all_feed_items = []
# Fetch feed items until end of feed
feedparser.USER_AGENT = self.USER_AGENT
while not reached_end_of_feed:
feed_url = 'https://www.google.com/finance/company_news?' \
+ urllib.urlencode(args)
feed = feedparser.parse(feed_url)
all_feed_items += feed['items']
reached_end_of_feed = len(feed['items']) != args['num']
# Prepare for requesting next page
args['start'] += args['num']
return all_feed_items