Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
_log.debug('sleeping for %s seconds before retry' % wait)
if exception_raised:
_log.warning('got %s sleeping for %s seconds before retry',
exception_raised, wait)
else:
_log.warning('sleeping for %s seconds before retry', wait)
time.sleep(wait)
# out of the loop, either an exception was raised or we had a success
if exception_raised:
raise exception_raised
return resp
# compose sessions, order matters (cache then throttle then retry)
class Scraper(CachingSession, ThrottledSession, RetrySession):
"""
Scraper is the most important class provided by scrapelib (and generally
the only one to be instantiated directly). It provides a large number
of options allowing for customization.
Usage is generally just creating an instance with the desired options and
then using the :meth:`urlopen` & :meth:`urlretrieve` methods of that
instance.
:param raise_errors: set to True to raise a :class:`HTTPError`
on 4xx or 5xx response
:param requests_per_minute: maximum requests per minute (0 for
unlimited, defaults to 60)
:param retry_attempts: number of times to retry if timeout occurs or
page returns a (non-404) error
:param retry_wait_seconds: number of seconds to retry after first failure,
resp = super(CachingSession, self).request(method, url, **kwargs)
resp.fromcache = False
return resp
resp = None
method = method.lower()
request_key = self.key_for_request(method, url, **kwargs)
if request_key and not self.cache_write_only:
resp = self.cache_storage.get(request_key)
if resp:
resp.fromcache = True
else:
resp = super(CachingSession, self).request(method, url, **kwargs)
# save to cache if request and response meet criteria
if request_key and self.should_cache_response(resp):
self.cache_storage.set(request_key, resp)
resp.fromcache = False
return resp