How to use the cachecontrol.CacheControl function in CacheControl

To help you get started, we’ve selected a few CacheControl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sgraaf / Replicate-Toronto-BookCorpus / src / download_books.py View on Github external
# load book_download_urls
    book_download_urls = read(root_dir / 'book_download_urls.txt').splitlines()

    # remove any books that have already been downloaded
    book_download_urls = [url for url in book_download_urls if not (data_dir / f'{get_book_id(url)}.txt').exists()]

    if book_download_urls:
        # keep only the first 500 (as smashwords blocks the IP-address after 500 requests)
        book_download_urls = book_download_urls[:500]

        # get headers (user-agents)
        headers = get_headers(root_dir / 'user-agents.txt')

        # initialize cache-controlled session
        session = CacheControl(Session())

        # get proxies
        proxies = get_free_proxies(session=session, headers=headers[0])

        # get the books (concurrently)
        with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
            for nb_retry in count(1):
                # break if all book_download_urls successful
                if not book_download_urls:
                    break

                # break if max number of retries exceeded
                if nb_retry > NB_RETRIES:
                    print(f'Could not download {len(book_download_urls)} books after {NB_RETRIES} retries.')
                    break
github GISupportICRC / ArcGIS2Mapbox / arc2mb.py View on Github external
def __init__(self, access_token=None, cache=None):
        """Constructs a Service object.
        """
        self.session = requests.Session()
        self.session.params.update(access_token=access_token)
        self.session.headers.update({
            "User-Agent": "mapbox-sdk-py/{0} {1}".format(
                "0.8.0", requests.utils.default_user_agent())})
        if cache:
            self.session = CacheControl(self.session, cache=cache)
github sgraaf / papermap / papermap / papermap.py View on Github external
def download_tiles(self):
        # initialize a cache-controlled requests Session object and set the headers
        self.session = CacheControl(requests.Session())
        self.session.headers = HEADERS

        # download the tile images
        for nb_retry in count(1):
            # get the unsuccessful tiles
            tiles = [tile for tile in self.tiles if not tile.success]

            # break if all tiles successful
            if not tiles:
                break

            # break if max number of retries exceeded
            if nb_retry > self.nb_retries:
                if not self.quiet_mode:
                    raise RuntimeError(
                        f'Could not download {len(tiles)}/{len(self.tiles)} tiles after {self.nb_retries} retries.')
github opentargets / opentargets-py / opentargets / conn.py View on Github external
self._logger = logging.getLogger(__name__)
        self.host = host
        self.port = str(port)
        self.api_version = api_version
        session= requests.Session()
        session.verify = verify
        session.proxies = proxies
        session.auth = auth
        retry_policies = Retry(total=10,
                               read=10,
                               connect=10,
                               backoff_factor=.5,
                               status_forcelist=(500, 502, 504),)
        http_retry = HTTPAdapter(max_retries=retry_policies)
        session.mount(host, http_retry)
        self.session = CacheControl(session)
        self._get_remote_api_specs()
github conda / conda-build / conda_build / skeletons / cran.py View on Github external
def get_session(output_dir, verbose=True):
    session = requests.Session()
    try:
        import cachecontrol
        import cachecontrol.caches
    except ImportError:
        if verbose:
            print("Tip: install CacheControl (conda package) to cache the CRAN metadata")
    else:
        session = cachecontrol.CacheControl(session,
            cache=cachecontrol.caches.FileCache(join(output_dir,
                '.web_cache')))
    return session
github conda / conda-build / conda_build / cran.py View on Github external
def get_session(output_dir, verbose=True, cache=[]):
    if cache:
        return cache[0]
    session = requests.Session()
    try:
        import cachecontrol
        import cachecontrol.caches
    except ImportError:
        if verbose:
            print("Tip: install CacheControl to cache the CRAN metadata")
    else:
        session = cachecontrol.CacheControl(session,
            cache=cachecontrol.caches.FileCache(join(output_dir,
                '.web_cache')))

    cache.append(session)
    return session
github kibitzr / kibitzr / kibitzr / fetcher / simple.py View on Github external
def __init__(self, conf):
        self.conf = conf
        self.session = CacheControl(requests.Session())
        self.session.headers.update({
            'User-agent': 'Kibitzr/' + version,
        })
        self.url = conf['url']
        self.valid_http = set(conf.get('valid_http', [200]))
        self.verify_cert = conf.get('verify-cert', conf.get('verify_cert', True))