How to use the gluonnlp.Vocab.from_json function in gluonnlp

To help you get started, we’ve selected a few gluonnlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmlc / gluon-nlp / scripts / question_answering / data_pipeline.py View on Github external
Word-level vocabulary
        char_vocab : Vocab
            Char-level vocabulary
        """
        with open(os.path.join(self._data_root_path, self._processed_train_data_file_name),
                  'r') as f:
            train_examples = json.load(f)

        with open(os.path.join(self._data_root_path, self._processed_dev_data_file_name), 'r') as f:
            dev_examples = json.load(f)

        with open(os.path.join(self._data_root_path, self._word_vocab_file_name), 'r') as f:
            word_vocab = Vocab.from_json(json.load(f))

        with open(os.path.join(self._data_root_path, self._char_vocab_file_name), 'r') as f:
            char_vocab = Vocab.from_json(json.load(f))

        return train_examples, dev_examples, word_vocab, char_vocab
github dmlc / gluon-nlp / scripts / natural_language_inference / main.py View on Github external
else:
            vocab = build_vocab(train_dataset)
            with open(vocab_path, 'w') as fout:
                fout.write(vocab.to_json())
        glove = nlp.embedding.create(args.embedding, source=args.embedding_source)
        vocab.set_embedding(glove)

        train_data_loader = prepare_data_loader(args, train_dataset, vocab)
        val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True)

        model = build_model(args, vocab)
        train_model(model, train_data_loader, val_data_loader, vocab.embedding, ctx, args)
    elif args.mode == 'test':
        model_args = argparse.Namespace(**json.load(
            open(os.path.join(args.model_dir, 'config.json'))))
        vocab = nlp.Vocab.from_json(
            open(os.path.join(args.model_dir, 'vocab.jsons')).read())
        val_dataset = read_dataset(args, 'test_file')
        val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True)
        model = build_model(model_args, vocab)
        model.load_parameters(os.path.join(
            args.model_dir, 'checkpoints', 'valid_best.params'), ctx=ctx)
        loss_func = gluon.loss.SoftmaxCrossEntropyLoss()
        logger.info('Test on {}'.format(args.test_file))
        loss, acc = test_model(model, val_data_loader, loss_func, ctx)
        logger.info('loss={:.4f} acc={:.4f}'.format(loss, acc))
github dmlc / gluon-nlp / scripts / language_model / conversion_utils / compare_transformerxl_pytorch_gluon_model.py View on Github external
def compare_transformerxl(args, kwargs, corpus):
    # Data
    np_features, np_labels, batch_size, tgt_len = get_data(args)

    # Models
    model_p = transformers.TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
    model_p.crit.keep_order = True
    model_p.transformer.output_attentions = False  # no change of default; breaks model if changed
    model_p.transformer.output_hidden_states = True

    with open(args.gluon_vocab_file, 'r') as f:
        vocab = nlp.Vocab.from_json(f.read())
    ctx = mx.gpu()
    model = TransformerXL(vocab_size=len(vocab), clamp_len=model_p.transformer.clamp_len, **kwargs)
    model.initialize(ctx=ctx)
    model.load_parameters(args.gluon_parameter_file, ignore_extra=False)
    model.hybridize()

    # Computation
    assert len(np_features) == 2
    mems = model.begin_mems(batch_size, model_p.config.mem_len, context=ctx)
    mems_p = None
    for batch in range(2):
        print('Batch {}'.format(batch))

        features_nd = mx.nd.array(np_features[batch], ctx=ctx)
        labels_nd = mx.nd.array(np_labels[batch], ctx=ctx)
        features_p = torch.tensor(np_features[batch], dtype=torch.long)
github dmlc / gluon-nlp / scripts / word_embeddings / data.py View on Github external
def vocab(self):
        path = os.path.join(self._path, 'vocab.json')
        with io.open(path, 'r', encoding='utf-8') as in_file:
            return Vocab.from_json(in_file.read())
github dmlc / gluon-nlp / scripts / conversion_tools / compare_tf_gluon_model.py View on Github external
tensorflow_all_out_features.append(tensorflow_out_features)

    output_json['features'] = tensorflow_all_out_features
    tensorflow_all_out.append(output_json)

tf_outputs = [tensorflow_all_out[0]['features'][0]['layers'][t]['values'] for t in layer_indexes]

###############################################################################
#                               Gluon MODEL                                   #
###############################################################################

if args.gluon_parameter_file:
    assert args.gluon_vocab_file, \
        'Must specify --gluon_vocab_file when specifying --gluon_parameter_file'
    with open(args.gluon_vocab_file, 'r') as f:
        vocabulary = nlp.Vocab.from_json(f.read())
    bert, vocabulary = nlp.model.get_model(args.gluon_model,
                                           dataset_name=None,
                                           vocab=vocabulary,
                                           pretrained=not args.gluon_parameter_file,
                                           use_pooler=False,
                                           use_decoder=False,
                                           use_classifier=False)
    try:
        bert.cast('float16')
        bert.load_parameters(args.gluon_parameter_file, ignore_extra=True)
        bert.cast('float32')
    except AssertionError:
        bert.cast('float32')
        bert.load_parameters(args.gluon_parameter_file, ignore_extra=True)
else:
    assert not args.gluon_vocab_file, \
github dmlc / gluon-nlp / scripts / natural_language_inference / main.py View on Github external
json.dump(vars(args), open(os.path.join(args.output_dir, 'config.json'), 'w'))

    if args.gpu_id == -1:
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(args.gpu_id)

    mx.random.seed(args.seed, ctx=ctx)

    if args.mode == 'train':
        train_dataset = read_dataset(args, 'train_file')
        val_dataset = read_dataset(args, 'test_file')

        vocab_path = os.path.join(args.output_dir, 'vocab.jsons')
        if os.path.exists(vocab_path):
            vocab = nlp.Vocab.from_json(open(vocab_path).read())
        else:
            vocab = build_vocab(train_dataset)
            with open(vocab_path, 'w') as fout:
                fout.write(vocab.to_json())
        glove = nlp.embedding.create(args.embedding, source=args.embedding_source)
        vocab.set_embedding(glove)

        train_data_loader = prepare_data_loader(args, train_dataset, vocab)
        val_data_loader = prepare_data_loader(args, val_dataset, vocab, test=True)

        model = build_model(args, vocab)
        train_model(model, train_data_loader, val_data_loader, vocab.embedding, ctx, args)
    elif args.mode == 'test':
        model_args = argparse.Namespace(**json.load(
            open(os.path.join(args.model_dir, 'config.json'))))
        vocab = nlp.Vocab.from_json(