Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_galaxy10(self):
# make sure galaxy10 exists on Bovy's server
r = requests.head(_G10_ORIGIN, allow_redirects=True)
self.assertEqual(r.status_code, 200)
r.close()
galaxy10cls_lookup(0)
self.assertRaises(ValueError, galaxy10cls_lookup, 11)
galaxy10_confusion(np.ones((10,10)))
def filename(self):
"""
Returns the name of the output file. This helper relies on network connectivity.
"""
r = requests.head(self.url, allow_redirects=True)
if r.status_code != 200:
raise RuntimeError('%s on %s' % (r.status_code, self.url))
value = r.headers.get('Last-Modified')
if value is None:
raise RuntimeError('missing Last-Modified header')
parsed_date = eut.parsedate(value)
if parsed_date is None:
raise RuntimeError('could not parse Last-Modifier header')
last_modified_date = datetime.date(*parsed_date[:3])
digest = hashlib.sha1(six.b(self.url)).hexdigest()
return '%s-%s.file' % (digest, last_modified_date.isoformat())
def picture_url_validator(node, value):
"""Validate picture url."""
try:
resp = requests.head(value, timeout=2)
except ConnectionError:
msg = 'Connection failed'.format(value)
raise Invalid(node, msg)
if resp.status_code != 200:
msg = 'Connection failed, status is {} instead of 200'
raise Invalid(node, msg.format(resp.status_code))
mimetype = resp.headers.get('Content-Type', '')
image_mime_type_validator(node, mimetype)
size = int(resp.headers.get('Content-Length', '0'))
if size > FileStoreType.SIZE_LIMIT:
msg = 'Asset too large: {} bytes'.format(size)
raise Invalid(node, msg)
"""
Given a URL, look for the corresponding dataset in the local cache.
If it's not there, download it. Then return the path to the cached file.
"""
if cache_dir is None:
cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
if isinstance(cache_dir, Path):
cache_dir = str(cache_dir)
os.makedirs(cache_dir, exist_ok=True)
# Get eTag to add to filename, if it exists.
if url.startswith("s3://"):
etag = s3_etag(url)
else:
response = requests.head(url, allow_redirects=True)
if response.status_code != 200:
raise IOError("HEAD request failed for url {} with status code {}"
.format(url, response.status_code))
etag = response.headers.get("ETag")
filename = url_to_filename(url, etag)
# get cache path to put the file
cache_path = os.path.join(cache_dir, filename)
if not os.path.exists(cache_path):
# Download to temporary file, then copy to cache dir once finished.
# Otherwise you get corrupt cache entries if the download gets interrupted.
with tempfile.NamedTemporaryFile() as temp_file:
logger.info("%s not found in cache, downloading to %s", url, temp_file.name)
def get_configs(etag=None):
try:
head = requests.head(OVPN_ADDR, timeout=TIMEOUT)
# Follow the redirect if there is one
if head.status_code in STATUS_REDIRECT:
redirect_url = head.headers['Location']
head = requests.head(redirect_url, timeout=TIMEOUT)
if head.status_code in STATUS_SUCCESS:
header_etag = head.headers['etag']
if header_etag != etag:
resp = requests.get(OVPN_ADDR, timeout=TIMEOUT)
if resp.status_code in STATUS_SUCCESS:
return (resp.content, header_etag)
else:
return (None, None)
else:
return False
except Exception as ex:
print(ex)
return False
def update_download_info(client, record):
rid = record["data"]["id"]
dlinfo = dict(record["data"]["download"])
resp = requests.head(dlinfo["url"])
dlinfo["size"] = int(resp.headers["Content-Length"])
dlinfo["mimetype"] = resp.headers["Content-Type"]
# XXX: use JSON-merge header.
client.patch_record(data={"download": dlinfo}, id=rid)
def crawl_record(self, source, date):
source_url = BASE_URL % date.strftime('%Y%m%d')
if self.foreign_id_exists(source, source_url):
# assuming they're immutable
return
res = requests.head(source_url)
if res.status_code != 200:
return
meta = self.metadata()
date_str = date.strftime('%d.%m.%Y')
meta.title = 'Liechtenstein Kundmachungen %s' % date_str
meta.languages = ['de']
meta.add_country('li')
meta.extension = 'pdf'
meta.add_date(date)
meta.mime_type = 'application/pdf'
meta.foreign_id = source_url
self.emit_url(source, meta, source_url)
if num_threads > 1:
return download_file_parallel(
url,
target_path,
num_threads=num_threads
)
r = requests.get(url, stream=True)
if r.status_code != 200:
return (url, False, 'Failed to download file {} (status {})!'.format(
r.status_code,
url
))
file_size = int(requests.head(url).headers['Content-Length'])
bytes_loaded = 0
bytes_since_last_log = 0
logger.info('Download file from "%s" with size: %d B', url, file_size)
with open(target_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
bytes_loaded += 1024
bytes_since_last_log += 1024
if bytes_since_last_log >= PROGRESS_LOGGER_BYTE_DELAY:
logger.info('Download [%06.2f%%]', bytes_loaded / file_size * 100)
bytes_since_last_log = 0
logger.info('Finished download')
try:
log_info("* Checking If-Modified-Since", 2)
with request_manager.normal_request():
res = requests.get(
const_download_url().format(ext_id),
stream=True,
headers=headers,
timeout=10)
log_info("* crx archive (Last: {}): {}".format(value_of(last_crx_http_date, "n/a"), str(res.status_code)), 2)
extfilename = os.path.basename(res.url)
if re.search('&', extfilename):
extfilename = "default.crx"
if res.status_code == 304:
with request_manager.normal_request():
etag = requests.head(
const_download_url().format(ext_id),
timeout=10,
allow_redirects=True).headers.get('ETag')
write_text(tmptardir, date, extfilename + ".etag", etag)
log_info("- checking etag, last: {}".format(last_crx_etag), 3)
log_info(" current: {}".format(etag), 3)
if (etag is not "") and (etag != last_crx_etag):
log_info("- downloading due to different etags", 3)
with request_manager.normal_request():
res = requests.get(
const_download_url().format(ext_id),
stream=True,
timeout=10)
else: