Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_corpus(self):
self.assertIsInstance(thai_negations(), frozenset)
self.assertIsInstance(thai_stopwords(), frozenset)
self.assertIsInstance(thai_syllables(), frozenset)
self.assertIsInstance(thai_words(), frozenset)
self.assertIsInstance(countries(), frozenset)
self.assertIsInstance(provinces(), frozenset)
self.assertIsInstance(thai_female_names(), frozenset)
self.assertIsInstance(thai_male_names(), frozenset)
self.assertEqual(
get_corpus_db_detail("XXX"), {}
) # corpus does not exist
self.assertTrue(download("test")) # download the first time
self.assertTrue(download(name="test", force=True)) # force download
self.assertTrue(download(name="test")) # try download existing
self.assertFalse(
download(name="test", url="wrongurl")
) # URL not exist
self.assertFalse(
download(name="XxxXXxxx817d37sf")
) # corpus name not exist
self.assertIsNotNone(get_corpus_db_detail("test")) # corpus exists
self.assertTrue(remove("test")) # remove existing
self.assertFalse(remove("test")) # remove non-existing
self.assertTrue(download(name="test", version="0.1"))
self.assertTrue(remove("test"))
def _get_path(fname: str) -> str:
"""
:meth: download get path of file from pythainlp-corpus
:param str fname: file name
:return: path to downloaded file
"""
path = get_corpus_path(fname)
if not path:
download(fname)
path = get_corpus_path(fname)
return path
def get_path(fname):
path = get_file(fname)
if not path:
download(fname)
path = get_file(fname)
return path
def download(args):
corpus.download(args.name)
def download_model()->None:
"""
Download Model
"""
if get_corpus_path("scb_1m_th-en_newmm") is None:
download("scb_1m_th-en_newmm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_newmm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_th-en_spm") is None:
download("scb_1m_th-en_spm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_spm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_en-th_moses") is None:
download("scb_1m_en-th_moses", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
tar.extractall()
tar.close()
print("Install model...")
if not os.path.exists(get_full_data_path("scb_1m_th-en_newmm")):
os.mkdir(get_full_data_path("scb_1m_th-en_newmm"))
with tarfile.open(get_corpus_path("scb_1m_th-en_newmm")) as tar:
tar.extractall(path=get_full_data_path("scb_1m_th-en_newmm"))
if not os.path.exists(get_full_data_path("scb_1m_th-en_spm")):
os.mkdir(get_full_data_path("scb_1m_th-en_spm"))
def _download() -> str:
path = get_corpus_path("thai2fit_wv")
if not path:
download_data("thai2fit_wv")
path = get_corpus_path("thai2fit_wv")
return path
def get_path(fname):
path = get_file(fname)
if not path:
download(fname)
path = get_file(fname)
return path