How to use the requests.head function in requests

To help you get started, we’ve selected a few requests examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github henrysky / astroNN / tests / test_datasets.py View on Github external
def test_galaxy10(self):
        # make sure galaxy10 exists on Bovy's server

        r = requests.head(_G10_ORIGIN, allow_redirects=True)
        self.assertEqual(r.status_code, 200)
        r.close()

        galaxy10cls_lookup(0)
        self.assertRaises(ValueError, galaxy10cls_lookup, 11)
        galaxy10_confusion(np.ones((10,10)))
github miku / siskin / siskin / common.py View on Github external
def filename(self):
        """
        Returns the name of the output file. This helper relies on network connectivity.
        """
        r = requests.head(self.url, allow_redirects=True)
        if r.status_code != 200:
            raise RuntimeError('%s on %s' % (r.status_code, self.url))
        value = r.headers.get('Last-Modified')
        if value is None:
            raise RuntimeError('missing Last-Modified header')
        parsed_date = eut.parsedate(value)
        if parsed_date is None:
            raise RuntimeError('could not parse Last-Modifier header')
        last_modified_date = datetime.date(*parsed_date[:3])
        digest = hashlib.sha1(six.b(self.url)).hexdigest()
        return '%s-%s.file' % (digest, last_modified_date.isoformat())
github liqd / adhocracy3 / src / adhocracy_core / adhocracy_core / sheets / image.py View on Github external
def picture_url_validator(node, value):
    """Validate picture url."""
    try:
        resp = requests.head(value, timeout=2)
    except ConnectionError:
        msg = 'Connection failed'.format(value)
        raise Invalid(node, msg)

    if resp.status_code != 200:
        msg = 'Connection failed, status is {} instead of 200'
        raise Invalid(node, msg.format(resp.status_code))

    mimetype = resp.headers.get('Content-Type', '')
    image_mime_type_validator(node, mimetype)

    size = int(resp.headers.get('Content-Length', '0'))
    if size > FileStoreType.SIZE_LIMIT:
        msg = 'Asset too large: {} bytes'.format(size)
        raise Invalid(node, msg)
github woshiyyya / DFGN-pytorch / DFGN / pytorch_pretrained_bert / file_utils.py View on Github external
"""
    Given a URL, look for the corresponding dataset in the local cache.
    If it's not there, download it. Then return the path to the cached file.
    """
    if cache_dir is None:
        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    os.makedirs(cache_dir, exist_ok=True)

    # Get eTag to add to filename, if it exists.
    if url.startswith("s3://"):
        etag = s3_etag(url)
    else:
        response = requests.head(url, allow_redirects=True)
        if response.status_code != 200:
            raise IOError("HEAD request failed for url {} with status code {}"
                          .format(url, response.status_code))
        etag = response.headers.get("ETag")

    filename = url_to_filename(url, etag)

    # get cache path to put the file
    cache_path = os.path.join(cache_dir, filename)

    if not os.path.exists(cache_path):
        # Download to temporary file, then copy to cache dir once finished.
        # Otherwise you get corrupt cache entries if the download gets interrupted.
        with tempfile.NamedTemporaryFile() as temp_file:
            logger.info("%s not found in cache, downloading to %s", url, temp_file.name)
github Chadsr / NordVPN-NetworkManager / nordnm / nordapi.py View on Github external
def get_configs(etag=None):
    try:
        head = requests.head(OVPN_ADDR, timeout=TIMEOUT)

        # Follow the redirect if there is one
        if head.status_code in STATUS_REDIRECT:
            redirect_url = head.headers['Location']
            head = requests.head(redirect_url, timeout=TIMEOUT)

        if head.status_code in STATUS_SUCCESS:
            header_etag = head.headers['etag']

            if header_etag != etag:
                resp = requests.get(OVPN_ADDR, timeout=TIMEOUT)
                if resp.status_code in STATUS_SUCCESS:
                    return (resp.content, header_etag)
            else:
                return (None, None)
        else:
            return False
    except Exception as ex:
        print(ex)
        return False
github mozilla-services / buildhub / jobs / listen_pulse.py View on Github external
def update_download_info(client, record):
    rid = record["data"]["id"]
    dlinfo = dict(record["data"]["download"])

    resp = requests.head(dlinfo["url"])
    dlinfo["size"] = int(resp.headers["Content-Length"])
    dlinfo["mimetype"] = resp.headers["Content-Type"]

    # XXX: use JSON-merge header.
    client.patch_record(data={"download": dlinfo}, id=rid)
github alephdata / aleph / aleph / crawlers / li_oera.py View on Github external
def crawl_record(self, source, date):
        source_url = BASE_URL % date.strftime('%Y%m%d')
        if self.foreign_id_exists(source, source_url):
            # assuming they're immutable
            return
        res = requests.head(source_url)
        if res.status_code != 200:
            return
        meta = self.metadata()
        date_str = date.strftime('%d.%m.%Y')
        meta.title = 'Liechtenstein Kundmachungen %s' % date_str
        meta.languages = ['de']
        meta.add_country('li')
        meta.extension = 'pdf'
        meta.add_date(date)
        meta.mime_type = 'application/pdf'
        meta.foreign_id = source_url
        self.emit_url(source, meta, source_url)
github ynop / audiomate / audiomate / utils / download.py View on Github external
if num_threads > 1:
        return download_file_parallel(
            url,
            target_path,
            num_threads=num_threads
        )

    r = requests.get(url, stream=True)

    if r.status_code != 200:
        return (url, False, 'Failed to download file {} (status {})!'.format(
            r.status_code,
            url
        ))

    file_size = int(requests.head(url).headers['Content-Length'])
    bytes_loaded = 0
    bytes_since_last_log = 0
    logger.info('Download file from "%s" with size: %d B', url, file_size)

    with open(target_path, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
                bytes_loaded += 1024
                bytes_since_last_log += 1024

                if bytes_since_last_log >= PROGRESS_LOGGER_BYTE_DELAY:
                    logger.info('Download [%06.2f%%]', bytes_loaded / file_size * 100)
                    bytes_since_last_log = 0

    logger.info('Finished download')
github logicalhacking / ExtensionCrawler / ExtensionCrawler / archive.py View on Github external
try:
        log_info("* Checking If-Modified-Since", 2)
        with request_manager.normal_request():
            res = requests.get(
                const_download_url().format(ext_id),
                stream=True,
                headers=headers,
                timeout=10)
        log_info("* crx archive (Last: {}): {}".format(value_of(last_crx_http_date, "n/a"), str(res.status_code)), 2)
        extfilename = os.path.basename(res.url)
        if re.search('&', extfilename):
            extfilename = "default.crx"

        if res.status_code == 304:
            with request_manager.normal_request():
                etag = requests.head(
                    const_download_url().format(ext_id),
                    timeout=10,
                    allow_redirects=True).headers.get('ETag')
            write_text(tmptardir, date, extfilename + ".etag", etag)
            log_info("- checking etag, last: {}".format(last_crx_etag), 3)
            log_info("              current: {}".format(etag), 3)

            if (etag is not "") and (etag != last_crx_etag):
                log_info("- downloading due to different etags", 3)

                with request_manager.normal_request():
                    res = requests.get(
                        const_download_url().format(ext_id),
                        stream=True,
                        timeout=10)
            else: