How to use danlp - 10 common examples

To help you get started, we’ve selected a few danlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alexandrainst / danlp / tests / test_ner_tagger.py View on Github external
def test_flair_tagger(self):
        # Download model beforehand
        download_model('flair.ner', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
        print("Downloaded the flair model")

        # Load the NER tagger using the DaNLP wrapper
        flair_model = load_flair_ner_model()

        # Using the flair POS tagger
        sentence = Sentence('jeg hopper pΓ₯ en bil som er rΓΈd sammen med Jens-Peter E. Hansen')
        flair_model.predict(sentence)

        expected_string = "jeg hopper pΓ₯ en bil som er rΓΈd sammen med Jens-Peter  E.  Hansen "

        self.assertEqual(sentence.to_tagged_string(), expected_string)
github alexandrainst / danlp / tests / test_flair_models.py View on Github external
def test_flair_tagger(self):
        # Download model beforehand
        download_model('flair.ner', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
        print("Downloaded the flair model")

        # Load the NER tagger using the DaNLP wrapper
        flair_model = load_flair_ner_model()

        # Using the flair POS tagger
        sentence = Sentence('jeg hopper pΓ₯ en bil som er rΓΈd sammen med Jens-Peter E. Hansen')
        flair_model.predict(sentence)

        expected_string = "jeg hopper pΓ₯ en bil som er rΓΈd sammen med Jens-Peter  E.  Hansen "

        self.assertEqual(sentence.to_tagged_string(), expected_string)
github alexandrainst / danlp / tests / test_embeddings.py View on Github external
'vocab_size': 5000,
            'dimensions': 300,
            'md5_checksum': 'fcaa981a613b325ae4dc61aba235aa82',
            'size': 5594508,
            'file_extension': '.bin'
        }

        AVAILABLE_EMBEDDINGS.append('wiki.da.small.wv')

        self.embeddings_for_testing = [
            'wiki.da.small.wv',
            'dslreddit.da.wv'
        ]
        # Lets download the models and unzip it
        for emb in self.embeddings_for_testing:
            download_model(emb, process_func=_unzip_process_func)
github alexandrainst / danlp / tests / test_download.py View on Github external
def test_download_fails_with_wrong_title(self):
        with self.assertRaises(ValueError):
            download_model('do.not.exists.wv')

        with self.assertRaises(ValueError):
            download_dataset('do.not.exists.zip')
github alexandrainst / danlp / tests / test_embeddings.py View on Github external
def test_fasttext_embeddings(self):
        # First we will add smaller test embeddings to the
        MODELS['ddt.swv'] = {
            'url': 'https://danlp.s3.eu-central-1.amazonaws.com/test-models/ddt.swv.zip',
            'vocab_size': 5000,
            'dimensions': 100,
            'md5_checksum': 'c50c61e1b434908e2732c80660abf8bf',
            'size': 741125088,
            'file_extension': '.bin'
        }

        AVAILABLE_SUBWORD_EMBEDDINGS.append('ddt.swv')

        download_model('ddt.swv', process_func=_unzip_process_func)

        fasttext_embeddings = load_wv_with_gensim('ddt.swv')

        self.assertEqual(type(fasttext_embeddings), FastTextKeyedVectors)

        # The word is not in the vocab
        self.assertNotIn('institutmedarbejdskontrakt', fasttext_embeddings.vocab)

        # However we can get an embedding because of subword units
        self.assertEqual(fasttext_embeddings['institutmedarbejdskontrakt'].size, 100)
github alexandrainst / danlp / tests / test_flair_models.py View on Github external
def test_flair_tagger(self):
        # Download model beforehand
        download_model('flair.pos', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
        print("Downloaded the flair model")

        # Load the POS tagger using the DaNLP wrapper
        flair_model = load_flair_pos_model()

        # Using the flair POS tagger
        sentence = Sentence('jeg hopper pΓ₯ en bil som er rΓΈd sammen med Jens-Peter E. Hansen')
        flair_model.predict(sentence)

        expected_string = "jeg  hopper  pΓ₯  en  bil  som  er " \
                          " rΓΈd  sammen  med  Jens-Peter  E.  Hansen "

        self.assertEqual(sentence.to_tagged_string(), expected_string)
github alexandrainst / danlp / tests / test_spacy_model.py View on Github external
def test_download(self):
        # Download model beforehand
        model_path = download_model('spacy', DEFAULT_CACHE_DIR,
                                    process_func=_unzip_process_func,
                                    verbose=True)

        info = spacy.info(model_path)
        self.assertListEqual(info['pipeline'], ['tagger', 'parser', 'ner'])
        self.assertEqual(info['lang'], 'da')
github alexandrainst / danlp / tests / test_spacy_model.py View on Github external
def test_download(self):
        # Download model beforehand
        model_path = download_model('spacy', DEFAULT_CACHE_DIR,
                                    process_func=_unzip_process_func,
                                    verbose=True)

        info = spacy.info(model_path)
        self.assertListEqual(info['pipeline'], ['tagger', 'parser', 'ner'])
        self.assertEqual(info['lang'], 'da')
github alexandrainst / danlp / tests / test_download.py View on Github external
def test_download_fails_with_wrong_title(self):
        with self.assertRaises(ValueError):
            download_model('do.not.exists.wv')

        with self.assertRaises(ValueError):
            download_dataset('do.not.exists.zip')
github alexandrainst / danlp / tests / test_datasets.py View on Github external
def test_europarlsentiment1(self):
        eusent = EuroparlSentiment1()
        df = eusent.load_with_pandas()
        self.assertEqual(len(df), 184)