Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def maybe_download_files(data_dir: str = "data") -> None:
if not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True)
if IS_TEST:
# Sample data pickle
gdown.download(SMALL_DATA_URL, output=SAMPLE_DATA, quiet=None)
else:
# Books
gdown.download(YA_BOOKS_URL, output=BOOK_DATA, quiet=None)
# Interactions
gdown.download(YA_INTERACTIONS_URL, output=INTERACTIONS_DATA, quiet=None)
# Reviews
gdown.download(YA_REVIEWS_URL, output=REVIEWS_DATA, quiet=None)
def download(url, filename, cachedir='~/hashtag/'):
f_cachedir = os.path.expanduser(cachedir)
os.makedirs(f_cachedir, exist_ok=True)
file_path = os.path.join(f_cachedir, filename)
if os.path.isfile(file_path):
print('Using cached model')
return file_path
gdown.download(url, file_path, quiet=False)
return file_path
def cached_download(url, path, md5=None, quiet=False):
def check_md5(path, md5):
print('[{:s}] Checking md5 ({:s})'.format(path, md5))
return md5sum(path) == md5
if osp.exists(path) and not md5:
print('[{:s}] File exists ({:s})'.format(path, md5sum(path)))
return path
elif osp.exists(path) and md5 and check_md5(path, md5):
return path
else:
dirpath = osp.dirname(path)
if not osp.exists(dirpath):
os.makedirs(dirpath)
return gdown.download(url, path, quiet=quiet)
def md5sum(filename, blocksize=65536):
def maybe_download_files(data_dir: str = "data") -> None:
if not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True)
if IS_TEST:
# Sample data pickle
gdown.download(SMALL_DATA_URL, output=SAMPLE_DATA, quiet=None)
else:
# Books
gdown.download(YA_BOOKS_URL, output=BOOK_DATA, quiet=None)
# Interactions
gdown.download(YA_INTERACTIONS_URL, output=INTERACTIONS_DATA, quiet=None)
# Reviews
gdown.download(YA_REVIEWS_URL, output=REVIEWS_DATA, quiet=None)
def cached_download(url, path, md5=None, quiet=False, postprocess=None):
def check_md5(path, md5):
print('[{:s}] Checking md5 ({:s})'.format(path, md5))
return md5sum(path) == md5
if osp.exists(path) and not md5:
print('[{:s}] File exists ({:s})'.format(path, md5sum(path)))
elif osp.exists(path) and md5 and check_md5(path, md5):
pass
else:
dirpath = osp.dirname(path)
if not osp.exists(dirpath):
os.makedirs(dirpath)
gdown.download(url, path, quiet=quiet)
if postprocess is not None:
postprocess(path)
return path
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = key + '_imagenet.pth'
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
gdown.download(pretrained_urls[key], cached_file, quiet=False)
state_dict = torch.load(cached_file)
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
def download_data_gdown(path):
import gdown
file_id = "1efHsY16pxK0lBD2gYCgCTnv1Swstq771"
url = f"https://drive.google.com/uc?id={file_id}"
data_zip = os.path.join(path, "data.zip")
gdown.download(url, data_zip, quiet=False)
with zipfile.ZipFile(data_zip, "r") as zip_ref:
zip_ref.extractall(path)
return
def maybe_download_files(data_dir: str = "data") -> None:
if not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True)
if IS_TEST:
# Sample data pickle
gdown.download(SMALL_DATA_URL, output=SAMPLE_DATA, quiet=None)
else:
# Books
gdown.download(YA_BOOKS_URL, output=BOOK_DATA, quiet=None)
# Interactions
gdown.download(YA_INTERACTIONS_URL, output=INTERACTIONS_DATA, quiet=None)
# Reviews
gdown.download(YA_REVIEWS_URL, output=REVIEWS_DATA, quiet=None)