How to use the allennlp.models.model.Model function in allennlp

To help you get started, we’ve selected a few allennlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github serrano-s / attn-tests / train_model.py View on Github external
datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(
            params.pop("vocabulary", {}),
            (instance for key, dataset in all_datasets.items()
             for instance in dataset
             if key in datasets_for_vocab_creation)
    )

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    model = transfer_prev_model_weights_to_new_model(prev_best_model, model)

    # Initializing the model can have side effect of expanding the vocabulary
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)
    validation_iterator_params = params.pop("validation_iterator", None)
    if validation_iterator_params:
        validation_iterator = DataIterator.from_params(validation_iterator_params)
        validation_iterator.index_with(vocab)
    else:
        validation_iterator = None

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
github allenai / vampire / vampire / models / vampire.py View on Github external
from allennlp.modules import TokenEmbedder
from allennlp.nn import InitializerApplicator, RegularizerApplicator
from allennlp.training.metrics import Average
from overrides import overrides
from scipy import sparse
from tabulate import tabulate

from vampire.common.util import (compute_background_log_frequency, load_sparse,
                                 read_json)
from vampire.modules import VAE

logger = logging.getLogger(__name__)


@Model.register("vampire")
class VAMPIRE(Model):
    """
    VAMPIRE is a variational document model for pretraining under low
    resource environments.

    Parameters
    ----------
    vocab : ``Vocabulary``, required
        A Vocabulary, required in order to compute sizes for input/output projections.
    bow_embedder : ``TextFieldEmbedder``, required
        Used to embed the ``tokens`` ``TextField`` we get as input to the model
        into a bag-of-word-counts.
    vae : ``VAE``, required
        The variational autoencoder used to project the BoW into a latent space.
    kl_weight_annealing : ``string``, required
        Annealing weight on the KL divergence of ELBO.
        Choice between `sigmoid`, `linear` and `constant` annealing.
github allenai / allennlp / allennlp / models / semantic_parsing / wikitables / wikitables_semantic_parser.py View on Github external
Embedding,
    Seq2SeqEncoder,
    Seq2VecEncoder,
    TextFieldEmbedder,
    TimeDistributed,
)
from allennlp.modules.seq2vec_encoders import BagOfEmbeddingsEncoder
from allennlp.nn import util
from allennlp.semparse import ParsingError
from allennlp.semparse.domain_languages.domain_language import ExecutionError
from allennlp.semparse.domain_languages import WikiTablesLanguage, START_SYMBOL
from allennlp.state_machines.states import GrammarBasedState, GrammarStatelet, RnnStatelet
from allennlp.training.metrics import Average


class WikiTablesSemanticParser(Model):
    """
    A ``WikiTablesSemanticParser`` is a :class:`Model` which takes as input a table and a question,
    and produces a logical form that answers the question when executed over the table.  The
    logical form is generated by a `type-constrained`, `transition-based` parser. This is an
    abstract class that defines most of the functionality related to the transition-based parser. It
    does not contain the implementation for actually training the parser. You may want to train it
    using a learning-to-search algorithm, in which case you will want to use
    ``WikiTablesErmSemanticParser``, or if you have a set of approximate logical forms that give the
    correct denotation, you will want to use ``WikiTablesMmlSemanticParser``.

    Parameters
    ----------
    vocab : ``Vocabulary``
    question_embedder : ``TextFieldEmbedder``
        Embedder for questions.
    action_embedding_dim : ``int``
github allenai / propara / propara / models / prolocal_model.py View on Github external
import numpy

from allennlp.common import Params
from allennlp.data import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import TextFieldEmbedder, Seq2SeqEncoder, Attention, TimeDistributed
from allennlp.nn import InitializerApplicator
from allennlp.nn.util import get_text_field_mask, weighted_sum
from allennlp.training.metrics import F1Measure, CategoricalAccuracy
from allennlp.modules.similarity_functions.bilinear import BilinearSimilarity
from allennlp.training.metrics import SpanBasedF1Measure
from allennlp.nn.util import sequence_cross_entropy_with_logits


@Model.register("ProLocalModel")
class ProLocalModel(Model):
    """
    This ``Model`` takes as input a dataset read by stateChangeDatasetReader
    Input: sentence, focus entity, focus verb
    Output: state change types for the focus entity, state change tags (mainly before, after locations of focus entity)
    The basic outline of this model is to 
        1. get an embedded representation for the sentence tokens, 
        2. concatenate each token embedding with verb and entity bits,
        3. pass them through bidirectional LSTM Seq2VecEncoder
           to create a contextual sentence embedding vector,
        4. apply bilinear attention to compute attention weights over sentence tokens   
        5. apply dense layer to get most likely state_change_type among
           {Create, Destroy, Move, None}
  
    Parameters
    ----------
    vocab : ``Vocabulary``
github allenai / allennlp / allennlp / models / semantic_parsing / text2sql_parser.py View on Github external
from allennlp.modules import Attention, Seq2SeqEncoder, TextFieldEmbedder, Embedding
from allennlp.nn import util
from allennlp.nn.initializers import InitializerApplicator
from allennlp.nn.regularizers import RegularizerApplicator
from allennlp.semparse.contexts.sql_context_utils import action_sequence_to_sql
from allennlp.state_machines.states import GrammarBasedState
from allennlp.state_machines.transition_functions import BasicTransitionFunction
from allennlp.state_machines import BeamSearch
from allennlp.state_machines.trainers import MaximumMarginalLikelihood
from allennlp.state_machines.states import GrammarStatelet, RnnStatelet
from allennlp.training.metrics import Average

logger = logging.getLogger(__name__)


@Model.register("text2sql_parser")
class Text2SqlParser(Model):
    """
    Parameters
    ----------
    vocab : ``Vocabulary``
    utterance_embedder : ``TextFieldEmbedder``
        Embedder for utterances.
    action_embedding_dim : ``int``
        Dimension to use for action embeddings.
    encoder : ``Seq2SeqEncoder``
        The encoder to use for the input utterance.
    decoder_beam_search : ``BeamSearch``
        Beam search used to retrieve best sequences after training.
    max_decoding_steps : ``int``
        When we're decoding with a beam search, what's the maximum number of steps we should take?
        This only applies at evaluation time, not during training.
github plasticityai / magnitude / pymagnitude / third_party / allennlp / models / crf_tagger.py View on Github external
#overrides
import torch
from torch.nn.modules.linear import Linear

from allennlp.common.checks import check_dimensions_match
from allennlp.data import Vocabulary
from allennlp.modules import Seq2SeqEncoder, TimeDistributed, TextFieldEmbedder
from allennlp.modules import ConditionalRandomField, FeedForward
from allennlp.modules.conditional_random_field import allowed_transitions
from allennlp.models.model import Model
from allennlp.nn import InitializerApplicator, RegularizerApplicator
import allennlp.nn.util as util
from allennlp.training.metrics import SpanBasedF1Measure


class CrfTagger(Model):
    u"""
    The ``CrfTagger`` encodes a sequence of text with a ``Seq2SeqEncoder``,
    then uses a Conditional Random Field model to predict a tag for each token in the sequence.

    Parameters
    ----------
    vocab : ``Vocabulary``, required
        A Vocabulary, required in order to compute sizes for input/output projections.
    text_field_embedder : ``TextFieldEmbedder``, required
        Used to embed the tokens ``TextField`` we get as input to the model.
    encoder : ``Seq2SeqEncoder``
        The encoder that we will use in between embedding tokens and predicting output tags.
    label_namespace : ``str``, optional (default=``labels``)
        This is needed to compute the SpanBasedF1Measure metric.
        Unless you did something unusual, the default value should be what you want.
    feedforward : ``FeedForward``, optional, (default = None).
github allenai / allennlp-reading-comprehension / allennlp_rc / models / qanet.py View on Github external
from allennlp.data import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import Highway
from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder
from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
from allennlp.nn import util, InitializerApplicator, RegularizerApplicator
from allennlp.training.metrics import BooleanAccuracy, CategoricalAccuracy
from allennlp.nn.util import masked_softmax

from allennlp_rc.models.util import get_best_span
from allennlp_rc.eval import SquadEmAndF1


@Model.register("qanet")
class QaNet(Model):
    """
    This class implements Adams Wei Yu's `QANet Model `_
    for machine reading comprehension published at ICLR 2018.

    The overall architecture of QANet is very similar to BiDAF. The main difference is that QANet
    replaces the RNN encoder with CNN + self-attention. There are also some minor differences in the
    modeling layer and output layer.

    Parameters
    ----------
    vocab : ``Vocabulary``
    text_field_embedder : ``TextFieldEmbedder``
        Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model.
    num_highway_layers : ``int``
        The number of highway layers to use in between embedding the input and passing it through
        the phrase layer.
github allenai / allennlp-semparse / allennlp_semparse / models / nlvr / nlvr_direct_semantic_parser.py View on Github external
from allennlp.models.model import Model
from allennlp.modules import Attention, TextFieldEmbedder, Seq2SeqEncoder
from allennlp.nn import Activation

from allennlp_semparse.domain_languages import NlvrLanguage
from allennlp_semparse.fields.production_rule_field import ProductionRule
from allennlp_semparse.models.nlvr.nlvr_semantic_parser import NlvrSemanticParser
from allennlp_semparse.state_machines import BeamSearch
from allennlp_semparse.state_machines.states import GrammarBasedState
from allennlp_semparse.state_machines.trainers import MaximumMarginalLikelihood
from allennlp_semparse.state_machines.transition_functions import BasicTransitionFunction

logger = logging.getLogger(__name__)


@Model.register("nlvr_direct_parser")
class NlvrDirectSemanticParser(NlvrSemanticParser):
    """
    ``NlvrDirectSemanticParser`` is an ``NlvrSemanticParser`` that gets around the problem of lack
    of logical form annotations by maximizing the marginal likelihood of an approximate set of target
    sequences that yield the correct denotation. The main difference between this parser and
    ``NlvrCoverageSemanticParser`` is that while this parser takes the output of an offline search
    process as the set of target sequences for training, the latter performs search during training.

    Parameters
    ----------
    vocab : ``Vocabulary``
        Passed to super-class.
    sentence_embedder : ``TextFieldEmbedder``
        Passed to super-class.
    action_embedding_dim : ``int``
        Passed to super-class.
github lyutyuh / acl19_subtagger / models / soft_dictionary_span_classifier_HSCRF.py View on Github external
Seq2VecEncoder)
from allennlp.modules import ConditionalRandomField, FeedForward, Pruner, Highway
from allennlp.modules.conditional_random_field import allowed_transitions
import allennlp
from allennlp.modules.span_extractors import SelfAttentiveSpanExtractor, EndpointSpanExtractor

from allennlp.models.model import Model
from allennlp.nn import InitializerApplicator, RegularizerApplicator
import allennlp.nn.util as util
from allennlp.training.metrics import CategoricalAccuracy

from modules.span_based_chunker import SpanBasedChunker
from metrics.span_f1 import MySpanF1


@Model.register("soft_dictionary_span_classifier_HSCRF")
class soft_dictionary_span_classifier_HSCRF(Model):
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 feature_size: int,
                 max_span_width: int,
                 encoder: Seq2SeqEncoder,
                 span_label_namespace: str = "span_tags",
                 token_label_namespace: str = "token_tags",
                 calculate_span_f1: bool = None,
                 verbose_metrics: bool = True,
                 feedforward: Optional[FeedForward] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 class_weight=None) -> None:
        
        super().__init__(vocab, regularizer)
github mandarjoshi90 / pair2vec / endtasks / esim_pair2vec.py View on Github external
from torch.autograd import Variable
from torch.nn.functional import normalize
from allennlp.common import Params
from allennlp.common.checks import check_dimensions_match
from allennlp.data import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import FeedForward
from allennlp.modules import Seq2SeqEncoder, SimilarityFunction, TimeDistributed, TextFieldEmbedder
from allennlp.nn import InitializerApplicator, RegularizerApplicator
from allennlp.nn.util import get_text_field_mask, last_dim_softmax, weighted_sum, replace_masked_values
from allennlp.training.metrics import CategoricalAccuracy
from endtasks import util
from endtasks.modules import VariationalDropout


@Model.register("esim-pair2vec")
class ESIMPair2Vec(Model):
    """
    This ``Model`` implements the ESIM sequence model described in `"Enhanced LSTM for Natural Language Inference"
    `_
    by Chen et al., 2017.

    Parameters
    ----------
    vocab : ``Vocabulary``
    text_field_embedder : ``TextFieldEmbedder``
        Used to embed the ``premise`` and ``hypothesis`` ``TextFields`` we get as input to the
        model.
    attend_feedforward : ``FeedForward``
        This feedforward network is applied to the encoded sentence representations before the
        similarity matrix is computed between words in the premise and words in the hypothesis.
    similarity_function : ``SimilarityFunction``