How to use the danlp.download._unzip_process_func function in danlp

To help you get started, we’ve selected a few danlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alexandrainst / danlp / tests / test_embeddings.py View on Github external
'vocab_size': 5000,
            'dimensions': 300,
            'md5_checksum': 'fcaa981a613b325ae4dc61aba235aa82',
            'size': 5594508,
            'file_extension': '.bin'
        }

        AVAILABLE_EMBEDDINGS.append('wiki.da.small.wv')

        self.embeddings_for_testing = [
            'wiki.da.small.wv',
            'dslreddit.da.wv'
        ]
        # Lets download the models and unzip it
        for emb in self.embeddings_for_testing:
            download_model(emb, process_func=_unzip_process_func)
github alexandrainst / danlp / tests / test_spacy_model.py View on Github external
def test_download(self):
        # Download model beforehand
        model_path = download_model('spacy', DEFAULT_CACHE_DIR,
                                    process_func=_unzip_process_func,
                                    verbose=True)

        info = spacy.info(model_path)
        self.assertListEqual(info['pipeline'], ['tagger', 'parser', 'ner'])
        self.assertEqual(info['lang'], 'da')
github alexandrainst / danlp / danlp / models / embeddings.py View on Github external
binary=False,
                                                      encoding='utf8')

        assert_wv_dimensions(word_vecs, pretrained_embedding)

        word_vecs.save_word2vec_format(bin_file_path, binary=True)

        # Clean up the files
        os.remove(org_vec_file)
        os.remove(new_vec_file)

    elif pretrained_embedding == 'dslreddit.da.wv':
        _process_dslreddit(tmp_file_path, cache_dir)

    elif pretrained_embedding == 'wiki.da.swv':
        _unzip_process_func(tmp_file_path, clean_up_raw_data, verbose,
                            file_in_zip='wiki.da.bin')

    elif pretrained_embedding == 'cc.da.swv':
        import gzip
        import shutil

        bin_file_path = os.path.join(cache_dir, pretrained_embedding + ".bin")
        if verbose:
            print(
                "Decompressing raw {} embeddings".format(pretrained_embedding))

        with gzip.open(tmp_file_path, 'rb') as fin, open(bin_file_path,
                                                         'wb') as fout:
            shutil.copyfileobj(fin, fout)

    elif pretrained_embedding == 'sketchengine.da.swv':
github alexandrainst / danlp / danlp / models / bert_models.py View on Github external
def __init__(self, cache_dir=DEFAULT_CACHE_DIR, verbose=False):
        from transformers import BertTokenizer, BertForSequenceClassification
         
        
        # download the model or load the model path
        path_sub = download_model('bert.subjective', cache_dir, process_func=_unzip_process_func,verbose=verbose)
        path_sub = os.path.join(path_sub,'bert.sub.v0.0.1')
        path_pol = download_model('bert.polarity', cache_dir, process_func=_unzip_process_func,verbose=verbose)
        path_pol = os.path.join(path_pol,'bert.pol.v0.0.1')
        
        self.tokenizer_sub = BertTokenizer.from_pretrained(path_sub)
        self.model_sub = BertForSequenceClassification.from_pretrained(path_sub)
        self.tokenizer_pol = BertTokenizer.from_pretrained(path_pol)
        self.model_pol = BertForSequenceClassification.from_pretrained(path_pol)
github alexandrainst / danlp / danlp / models / pos_taggers.py View on Github external
def load_flair_pos_model(cache_dir=DEFAULT_CACHE_DIR, verbose=False):
    """

    :param cache_dir:
    :param verbose:
    :return:
    """
    from flair.models import SequenceTagger

    model_weight_path = download_model('flair.pos', cache_dir, process_func=_unzip_process_func, verbose=verbose)

    # using the flair model
    flair_model = SequenceTagger.load(model_weight_path)

    return flair_model
github alexandrainst / danlp / danlp / models / flair_models.py View on Github external
def load_flair_ner_model(cache_dir=DEFAULT_CACHE_DIR, verbose=False):
    """

    :param cache_dir:
    :param verbose:
    :return:
    """
    from flair.models import SequenceTagger

    model_weight_path = download_model('flair.ner', cache_dir, process_func=_unzip_process_func, verbose=verbose)

    # using the flair model
    flair_model = SequenceTagger.load(model_weight_path)

    return flair_model
github alexandrainst / danlp / danlp / models / flair_models.py View on Github external
def load_flair_pos_model(cache_dir=DEFAULT_CACHE_DIR, verbose=False):
    """

    :param cache_dir:
    :param verbose:
    :return:
    """
    from flair.models import SequenceTagger

    model_weight_path = download_model('flair.pos', cache_dir, process_func=_unzip_process_func, verbose=verbose)

    # using the flair model
    flair_model = SequenceTagger.load(model_weight_path)

    return flair_model