Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def download_model()->None:
"""
Download Model
"""
if get_corpus_path("scb_1m_th-en_newmm") is None:
download("scb_1m_th-en_newmm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_newmm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_th-en_spm") is None:
download("scb_1m_th-en_spm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_spm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_en-th_moses") is None:
download("scb_1m_en-th_moses", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
tar.extractall()
tar.close()
print("Install model...")
tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
tar.extractall()
tar.close()
print("Install model...")
if not os.path.exists(get_full_data_path("scb_1m_th-en_newmm")):
os.mkdir(get_full_data_path("scb_1m_th-en_newmm"))
with tarfile.open(get_corpus_path("scb_1m_th-en_newmm")) as tar:
tar.extractall(path=get_full_data_path("scb_1m_th-en_newmm"))
if not os.path.exists(get_full_data_path("scb_1m_th-en_spm")):
os.mkdir(get_full_data_path("scb_1m_th-en_spm"))
with tarfile.open(get_corpus_path("scb_1m_th-en_spm")) as tar:
tar.extractall(path=get_full_data_path("scb_1m_th-en_spm"))
if not os.path.exists(get_full_data_path("scb_1m_en-th_moses")):
os.mkdir(get_full_data_path("scb_1m_en-th_moses"))
with tarfile.open(get_corpus_path("scb_1m_en-th_moses")) as tar:
tar.extractall(path=get_full_data_path("scb_1m_en-th_moses"))
def __init__(self):
"""
Transliteration of Thai words.
Now supports Thai to Latin (romanization)
"""
# get the model, will download if it's not available locally
self.__model_filename = get_corpus_path(_MODEL_NAME)
loader = torch.load(self.__model_filename, map_location=device)
INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT = loader["encoder_params"]
OUTPUT_DIM, D_EMB_DIM, D_HID_DIM, D_DROPOUT = loader["decoder_params"]
self._maxlength = 100
self._char_to_ix = loader["char_to_ix"]
self._ix_to_char = loader["ix_to_char"]
self._target_char_to_ix = loader["target_char_to_ix"]
self._ix_to_target_char = loader["ix_to_target_char"]
# encoder/ decoder
# Restore the model and construct the encoder and decoder.
self._encoder = Encoder(INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT)
def download_model()->None:
"""
Download Model
"""
if get_corpus_path("scb_1m_th-en_newmm") is None:
download("scb_1m_th-en_newmm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_newmm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_th-en_spm") is None:
download("scb_1m_th-en_spm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_spm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_en-th_moses") is None:
download("scb_1m_en-th_moses", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
tar.extractall()
tar.close()
print("Install model...")
if not os.path.exists(get_full_data_path("scb_1m_th-en_newmm")):
os.mkdir(get_full_data_path("scb_1m_th-en_newmm"))
with tarfile.open(get_corpus_path("scb_1m_th-en_newmm")) as tar:
tar.extractall(path=get_full_data_path("scb_1m_th-en_newmm"))
if not os.path.exists(get_full_data_path("scb_1m_th-en_spm")):
os.mkdir(get_full_data_path("scb_1m_th-en_spm"))
with tarfile.open(get_corpus_path("scb_1m_th-en_spm")) as tar:
def download_model()->None:
"""
Download Model
"""
if get_corpus_path("scb_1m_th-en_newmm") is None:
download("scb_1m_th-en_newmm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_newmm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_th-en_spm") is None:
download("scb_1m_th-en_spm", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_th-en_spm"), "r:gz")
tar.extractall()
tar.close()
if get_corpus_path("scb_1m_en-th_moses") is None:
download("scb_1m_en-th_moses", force=True, version="1.0")
tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
tar.extractall()
tar.close()
print("Install model...")
if not os.path.exists(get_full_data_path("scb_1m_th-en_newmm")):
os.mkdir(get_full_data_path("scb_1m_th-en_newmm"))
with tarfile.open(get_corpus_path("scb_1m_th-en_newmm")) as tar:
tar.extractall(path=get_full_data_path("scb_1m_th-en_newmm"))
if not os.path.exists(get_full_data_path("scb_1m_th-en_spm")):
def __init__(self):
"""
Transliteration of Thai words.
Now supports Thai to Latin (romanization)
"""
# get the model, will download if it's not available locally
self.__model_filename = get_corpus_path(_MODEL_NAME)
loader = torch.load(self.__model_filename, map_location=device)
INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT = loader["encoder_params"]
OUTPUT_DIM, D_EMB_DIM, D_HID_DIM, D_DROPOUT = loader["decoder_params"]
self._maxlength = 100
self._char_to_ix = loader["char_to_ix"]
self._ix_to_char = loader["ix_to_char"]
self._target_char_to_ix = loader["target_char_to_ix"]
self._ix_to_target_char = loader["ix_to_target_char"]
# encoder/ decoder
# Restore the model and construct the encoder and decoder.
self._encoder = Encoder(INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT)
def get_model() -> Word2VecKeyedVectors:
"""
Get word vector model.
:return: `gensim` word2vec model
:rtype: gensim.models.keyedvectors.Word2VecKeyedVectors
"""
path = get_corpus_path(_MODEL_NAME)
return KeyedVectors.load_word2vec_format(path, binary=True)
def _download() -> str:
path = get_corpus_path("thai2fit_wv")
if not path:
download_data("thai2fit_wv")
path = get_corpus_path("thai2fit_wv")
return path