How to use the danlp.models.embeddings.load_wv_with_gensim function in danlp

To help you get started, we’ve selected a few danlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alexandrainst / danlp / tests / test_embeddings.py View on Github external
def test_embeddings_with_gensim(self):
        for emb in self.embeddings_for_testing:
            embeddings = load_wv_with_gensim(emb)
            self.assertEqual(MODELS[emb]['vocab_size'], len(embeddings.vocab))
github alexandrainst / danlp / tests / test_embeddings.py View on Github external
def test_fasttext_embeddings(self):
        # First we will add smaller test embeddings to the
        MODELS['ddt.swv'] = {
            'url': 'https://danlp.s3.eu-central-1.amazonaws.com/test-models/ddt.swv.zip',
            'vocab_size': 5000,
            'dimensions': 100,
            'md5_checksum': 'c50c61e1b434908e2732c80660abf8bf',
            'size': 741125088,
            'file_extension': '.bin'
        }

        AVAILABLE_SUBWORD_EMBEDDINGS.append('ddt.swv')

        download_model('ddt.swv', process_func=_unzip_process_func)

        fasttext_embeddings = load_wv_with_gensim('ddt.swv')

        self.assertEqual(type(fasttext_embeddings), FastTextKeyedVectors)

        # The word is not in the vocab
        self.assertNotIn('institutmedarbejdskontrakt', fasttext_embeddings.vocab)

        # However we can get an embedding because of subword units
        self.assertEqual(fasttext_embeddings['institutmedarbejdskontrakt'].size, 100)
github alexandrainst / danlp / examples / benchmarks / wordembeddings_benchmarks.py View on Github external
def load_wv_models():
    for da_wv_model in AVAILABLE_EMBEDDINGS:
        yield da_wv_model, load_wv_with_gensim(da_wv_model)