How to use the danlp.download.download_model function in danlp

To help you get started, we’ve selected a few danlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alexandrainst / danlp / tests / test_flair_models.py View on Github external
def test_flair_tagger(self):
        # Download model beforehand
        download_model('flair.ner', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
        print("Downloaded the flair model")

        # Load the NER tagger using the DaNLP wrapper
        flair_model = load_flair_ner_model()

        # Using the flair POS tagger
        sentence = Sentence('jeg hopper pΓ₯ en bil som er rΓΈd sammen med Jens-Peter E. Hansen')
        flair_model.predict(sentence)

        expected_string = "jeg hopper pΓ₯ en bil som er rΓΈd sammen med Jens-Peter  E.  Hansen "

        self.assertEqual(sentence.to_tagged_string(), expected_string)
github alexandrainst / danlp / tests / test_download.py View on Github external
def test_download_fails_with_wrong_title(self):
        with self.assertRaises(ValueError):
            download_model('do.not.exists.wv')

        with self.assertRaises(ValueError):
            download_dataset('do.not.exists.zip')
github alexandrainst / danlp / tests / test_embeddings.py View on Github external
def test_fasttext_embeddings(self):
        # First we will add smaller test embeddings to the
        MODELS['ddt.swv'] = {
            'url': 'https://danlp.s3.eu-central-1.amazonaws.com/test-models/ddt.swv.zip',
            'vocab_size': 5000,
            'dimensions': 100,
            'md5_checksum': 'c50c61e1b434908e2732c80660abf8bf',
            'size': 741125088,
            'file_extension': '.bin'
        }

        AVAILABLE_SUBWORD_EMBEDDINGS.append('ddt.swv')

        download_model('ddt.swv', process_func=_unzip_process_func)

        fasttext_embeddings = load_wv_with_gensim('ddt.swv')

        self.assertEqual(type(fasttext_embeddings), FastTextKeyedVectors)

        # The word is not in the vocab
        self.assertNotIn('institutmedarbejdskontrakt', fasttext_embeddings.vocab)

        # However we can get an embedding because of subword units
        self.assertEqual(fasttext_embeddings['institutmedarbejdskontrakt'].size, 100)
github alexandrainst / danlp / danlp / models / bert_models.py View on Github external
def __init__(self, cache_dir=DEFAULT_CACHE_DIR, verbose=False):
        from transformers import BertTokenizer, BertForSequenceClassification

        # download the model or load the model path
        path_emotion = download_model('bert.emotion', cache_dir,
                                       process_func=_unzip_process_func,
                                       verbose=verbose)
        path_emotion = os.path.join(path_emotion,'bert.emotion')
        path_reject = download_model('bert.noemotion', cache_dir,
                                       process_func=_unzip_process_func,
                                       verbose=verbose)
        path_reject = os.path.join(path_reject,'bert.noemotion')
        # load the models
        self.tokenizer_rejct = BertTokenizer.from_pretrained(path_reject)
        self.model_reject = BertForSequenceClassification.from_pretrained(path_reject)
        
        self.tokenizer = BertTokenizer.from_pretrained(path_emotion)
        self.model = BertForSequenceClassification.from_pretrained(path_emotion)
        
        # load the class names mapping
        self.catagories = {5: 'Foragt/Modvilje', 2: 'Forventning/Interrese',
                           0: 'GlΓ¦de/Sindsro', 3: 'Overasket/MΓ₯llΓΈs',
                           1: 'Tillid/Accept',
                           4: 'Vrede/Irritation', 6: 'Sorg/trist',
                           7: 'Frygt/Bekymret'}
github alexandrainst / danlp / danlp / models / bert_models.py View on Github external
def __init__(self, cache_dir=DEFAULT_CACHE_DIR, verbose=False):
        from transformers import AutoModelForTokenClassification
        from transformers import AutoTokenizer

        # download the model or load the model path
        weights_path = download_model('bert.ner', cache_dir,
                                      process_func=_unzip_process_func,
                                      verbose=verbose)

        self.label_list = ["O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG",
                           "I-ORG", "B-LOC", "I-LOC"]

        self.model = AutoModelForTokenClassification.from_pretrained(weights_path)
        self.tokenizer = AutoTokenizer.from_pretrained(weights_path)
github alexandrainst / danlp / danlp / models / embeddings.py View on Github external
:param bidirectional:
    :param cache_dir:
    :param verbose:
    """
    from flair.embeddings import FlairEmbeddings
    from flair.embeddings import WordEmbeddings
    from flair.embeddings import StackedEmbeddings

    embeddings = []

    if word_embeddings:
        fasttext_embedding = WordEmbeddings('da')
        embeddings.append(fasttext_embedding)

    if direction == 'bi' or direction == 'fwd':
        fwd_weight_path = download_model('flair.fwd', cache_dir,
                                         verbose=verbose,
                                         process_func=_unzip_process_func)
        embeddings.append(FlairEmbeddings(fwd_weight_path))

    if direction == 'bi' or direction == 'bwd':
        bwd_weight_path = download_model('flair.bwd', cache_dir,
                                         verbose=verbose,
                                         process_func=_unzip_process_func)
        embeddings.append(FlairEmbeddings(bwd_weight_path))

    if len(embeddings) == 1:
        return embeddings[0]

    return StackedEmbeddings(embeddings=embeddings)
github alexandrainst / danlp / danlp / models / embeddings.py View on Github external
spacy_model_dir = os.path.join(cache_dir, pretrained_embedding + ".spacy")

    if os.path.isdir(spacy_model_dir):
        # Return spaCy model if spaCy model dir exists
        return spacy.load(spacy_model_dir)

    bin_file_path = os.path.join(cache_dir, pretrained_embedding + ".bin")

    if os.path.isfile(bin_file_path):
        # Then we do not need to download the model
        model_info = MODELS[pretrained_embedding]
        model_info['name'] = pretrained_embedding
        _process_embeddings_for_spacy(bin_file_path[:-4] + ".tmp", model_info)
    else:
        download_model(pretrained_embedding, cache_dir,
                       _process_embeddings_for_spacy, verbose=True,
                       file_extension='.spacy')

    return spacy.load(spacy_model_dir)