How to use the pythainlp.corpus.get_corpus_path function in pythainlp

To help you get started, we’ve selected a few pythainlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PyThaiNLP / pythainlp / pythainlp / translate / core.py View on Github external
def download_model()->None:
    """
    Download Model
    """
    if get_corpus_path("scb_1m_th-en_newmm") is None:
        download("scb_1m_th-en_newmm", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_th-en_newmm"), "r:gz")
        tar.extractall()
        tar.close()
    if get_corpus_path("scb_1m_th-en_spm") is None:
        download("scb_1m_th-en_spm", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_th-en_spm"), "r:gz")
        tar.extractall()
        tar.close()
    if get_corpus_path("scb_1m_en-th_moses") is None:
        download("scb_1m_en-th_moses", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
        tar.extractall()
        tar.close()

    print("Install model...")
github PyThaiNLP / pythainlp / pythainlp / translate / core.py View on Github external
tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
        tar.extractall()
        tar.close()

    print("Install model...")
    if not os.path.exists(get_full_data_path("scb_1m_th-en_newmm")):
        os.mkdir(get_full_data_path("scb_1m_th-en_newmm"))
        with tarfile.open(get_corpus_path("scb_1m_th-en_newmm")) as tar:
            tar.extractall(path=get_full_data_path("scb_1m_th-en_newmm"))
    if not os.path.exists(get_full_data_path("scb_1m_th-en_spm")):
        os.mkdir(get_full_data_path("scb_1m_th-en_spm"))
        with tarfile.open(get_corpus_path("scb_1m_th-en_spm")) as tar:
            tar.extractall(path=get_full_data_path("scb_1m_th-en_spm"))
    if not os.path.exists(get_full_data_path("scb_1m_en-th_moses")):
        os.mkdir(get_full_data_path("scb_1m_en-th_moses"))
        with tarfile.open(get_corpus_path("scb_1m_en-th_moses")) as tar:
            tar.extractall(path=get_full_data_path("scb_1m_en-th_moses"))
github PyThaiNLP / pythainlp / pythainlp / transliterate / thaig2p.py View on Github external
def __init__(self):
        """
        Transliteration of Thai words.

        Now supports Thai to Latin (romanization)
        """
        # get the model, will download if it's not available locally
        self.__model_filename = get_corpus_path(_MODEL_NAME)

        loader = torch.load(self.__model_filename, map_location=device)

        INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT = loader["encoder_params"]
        OUTPUT_DIM, D_EMB_DIM, D_HID_DIM, D_DROPOUT = loader["decoder_params"]

        self._maxlength = 100

        self._char_to_ix = loader["char_to_ix"]
        self._ix_to_char = loader["ix_to_char"]
        self._target_char_to_ix = loader["target_char_to_ix"]
        self._ix_to_target_char = loader["ix_to_target_char"]

        # encoder/ decoder
        # Restore the model and construct the encoder and decoder.
        self._encoder = Encoder(INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT)
github PyThaiNLP / pythainlp / pythainlp / translate / core.py View on Github external
def download_model()->None:
    """
    Download Model
    """
    if get_corpus_path("scb_1m_th-en_newmm") is None:
        download("scb_1m_th-en_newmm", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_th-en_newmm"), "r:gz")
        tar.extractall()
        tar.close()
    if get_corpus_path("scb_1m_th-en_spm") is None:
        download("scb_1m_th-en_spm", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_th-en_spm"), "r:gz")
        tar.extractall()
        tar.close()
    if get_corpus_path("scb_1m_en-th_moses") is None:
        download("scb_1m_en-th_moses", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
        tar.extractall()
        tar.close()

    print("Install model...")
    if not os.path.exists(get_full_data_path("scb_1m_th-en_newmm")):
        os.mkdir(get_full_data_path("scb_1m_th-en_newmm"))
        with tarfile.open(get_corpus_path("scb_1m_th-en_newmm")) as tar:
            tar.extractall(path=get_full_data_path("scb_1m_th-en_newmm"))
    if not os.path.exists(get_full_data_path("scb_1m_th-en_spm")):
        os.mkdir(get_full_data_path("scb_1m_th-en_spm"))
        with tarfile.open(get_corpus_path("scb_1m_th-en_spm")) as tar:
github PyThaiNLP / pythainlp / pythainlp / translate / core.py View on Github external
def download_model()->None:
    """
    Download Model
    """
    if get_corpus_path("scb_1m_th-en_newmm") is None:
        download("scb_1m_th-en_newmm", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_th-en_newmm"), "r:gz")
        tar.extractall()
        tar.close()
    if get_corpus_path("scb_1m_th-en_spm") is None:
        download("scb_1m_th-en_spm", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_th-en_spm"), "r:gz")
        tar.extractall()
        tar.close()
    if get_corpus_path("scb_1m_en-th_moses") is None:
        download("scb_1m_en-th_moses", force=True, version="1.0")
        tar = tarfile.open(get_corpus_path("scb_1m_en-th_moses"), "r:gz")
        tar.extractall()
        tar.close()

    print("Install model...")
    if not os.path.exists(get_full_data_path("scb_1m_th-en_newmm")):
        os.mkdir(get_full_data_path("scb_1m_th-en_newmm"))
        with tarfile.open(get_corpus_path("scb_1m_th-en_newmm")) as tar:
            tar.extractall(path=get_full_data_path("scb_1m_th-en_newmm"))
    if not os.path.exists(get_full_data_path("scb_1m_th-en_spm")):
github PyThaiNLP / pythainlp / pythainlp / transliterate / thai2rom.py View on Github external
def __init__(self):
        """
        Transliteration of Thai words.

        Now supports Thai to Latin (romanization)
        """
        # get the model, will download if it's not available locally
        self.__model_filename = get_corpus_path(_MODEL_NAME)

        loader = torch.load(self.__model_filename, map_location=device)

        INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT = loader["encoder_params"]
        OUTPUT_DIM, D_EMB_DIM, D_HID_DIM, D_DROPOUT = loader["decoder_params"]

        self._maxlength = 100

        self._char_to_ix = loader["char_to_ix"]
        self._ix_to_char = loader["ix_to_char"]
        self._target_char_to_ix = loader["target_char_to_ix"]
        self._ix_to_target_char = loader["ix_to_target_char"]

        # encoder/ decoder
        # Restore the model and construct the encoder and decoder.
        self._encoder = Encoder(INPUT_DIM, E_EMB_DIM, E_HID_DIM, E_DROPOUT)
github PyThaiNLP / pythainlp / pythainlp / word_vector / core.py View on Github external
def get_model() -> Word2VecKeyedVectors:
    """
    Get word vector model.

    :return: `gensim` word2vec model
    :rtype: gensim.models.keyedvectors.Word2VecKeyedVectors
    """
    path = get_corpus_path(_MODEL_NAME)
    return KeyedVectors.load_word2vec_format(path, binary=True)
github PyThaiNLP / pythainlp / pythainlp / word_vector / __init__.py View on Github external
def _download() -> str:
    path = get_corpus_path("thai2fit_wv")
    if not path:
        download_data("thai2fit_wv")
        path = get_corpus_path("thai2fit_wv")
    return path