Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_fails_to_make_directory(self, f: Callable):
f.side_effect = OSError()
with self.assertRaises(OSError):
download.cached_download('https://example.com')
def test_cached_download(self, f: Callable):
def urlretrieve(url, path):
with open(path, 'w') as f:
f.write('test')
f.side_effect = urlretrieve
cache_path = download.cached_download('https://example.com')
self.assertEqual(f.call_count, 1)
args, kwargs = f.call_args
self.assertEqual(kwargs, {})
self.assertEqual(len(args), 2)
# The second argument is a temporary path, and it is removed
self.assertEqual(args[0], 'https://example.com')
self.assertTrue(os.path.exists(cache_path))
with open(cache_path) as f:
stored_data = f.read()
self.assertEqual(stored_data, 'test')
def test_cache_exists(self, f: Callable):
f.return_value = True
url = 'https://example.com'
path = download.cached_download(url)
self.assertEqual(path, os.path.join(self.temp_dir, '_dl_cache', hashlib.md5(url.encode('utf-8')).hexdigest()))
def test_file_exists(self):
# Make an empty file which has the same name as the cache directory
with open(os.path.join(self.temp_dir, '_dl_cache'), 'w'):
pass
with self.assertRaises(OSError):
download.cached_download('https://example.com')
def get_word2vec(lang: str = "en"):
# Download.
urls = {
"en": "https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz",
"ja": "http://public.shiroyagi.s3.amazonaws.com/latest-ja-word2vec-gensim-model.zip"
}
path = download.cached_download(urls[lang])
path = Path(path)
filename = "word2vec.gensim.model"
print("Loading model...")
if lang == "ja":
dirpath = Path(download.get_cache_directory(str(Path("word2vec"))))
download.cached_unzip(path, dirpath / lang)
model_path = dirpath / lang / filename
model = gensim.models.Word2Vec.load(str(model_path))
if lang == "en":
dirpath = Path(download.get_cache_directory(str(Path("word2vec") / "en")))
model_path = dirpath / filename
download.cached_decompress_gzip(path, model_path)
def get_fasttext(lang: str = "en"):
# Download.
urls = {
"en": "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.simple.zip",
"ja": "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.ja.zip",
"fr": "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.fr.zip",
}
path = download.cached_download(urls[lang])
path = Path(path)
dirpath = path.parent / 'fasttext' / lang
download.cached_unzip(path, dirpath)
print("Loading model...")
filename = Path(urls[lang]).stem + '.bin'
model = load_model(str(dirpath / filename))
return model