How to use the nemo.backends.pytorch.nm.TrainableNM function in NEMO

To help you get started, we’ve selected a few NEMO examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / transformer_nm.py View on Github external
std_init_range = 1 / math.sqrt(params["d_model"])
        self.apply(
            lambda module: transformer_weights_init(module, std_init_range))
        self.to(self._device)

    def forward(self, input_ids_tgt, hidden_states_src, input_mask_src,
                input_mask_tgt):
        hidden_states_tgt = self.embedding_layer(input_ids_tgt)
        hidden_states = self.decoder(
            hidden_states_tgt, input_mask_tgt,
            hidden_states_src, input_mask_src)
        return hidden_states


class TransformerLogSoftmaxNM(TrainableNM):
    @staticmethod
    def create_ports():
        input_ports = {
            "hidden_states":
            NeuralType({
                0: AxisType(BatchTag),
                1: AxisType(TimeTag),
                2: AxisType(ChannelTag)
            }),
        }

        output_ports = {
            "log_probs":
            NeuralType({
                0: AxisType(BatchTag),
                1: AxisType(TimeTag),
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / bert.py View on Github external
TrainableNM.__init__(self, **kwargs)

        self.log_softmax = ClassificationLogSoftmax(
            hidden_size=d_model,
            num_classes=num_classes
        )

        self.log_softmax.apply(transformer_weights_init)
        self.log_softmax.to(self._device)

    def forward(self, hidden_states):
        log_probs = self.log_softmax(hidden_states)
        return log_probs


class NextSentencePredictionLossNM(TrainableNM):
    @staticmethod
    def create_ports():
        input_ports = {
            "log_probs":
            NeuralType({
                0: AxisType(BatchTag),
                1: AxisType(TimeTag),
                2: AxisType(ChannelTag)
            }),
            "labels":
            NeuralType({0: AxisType(BatchTag)}),
        }

        output_ports = {"loss": NeuralType(None)}
        return input_ports, output_ports
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / modules / transformer_nm.py View on Github external
self.to(self._device)

    def forward(self,
                input_ids_tgt,
                hidden_states_src,
                input_mask_src,
                input_mask_tgt):
        hidden_states_tgt = self.embedding_layer(input_ids_tgt)
        hidden_states = self.decoder(hidden_states_tgt,
                                     input_mask_tgt,
                                     hidden_states_src,
                                     input_mask_src)
        return hidden_states


class GreedyLanguageGeneratorNM(TrainableNM):
    """
    Neural module for greedy text generation with language model

    Args:
        decoder: module which maps input_ids into hidden_states
        log_softmax: module which maps hidden_states into log_probs
        max_seq_length: maximum allowed length of generated sequences
        pad_token: index of padding token in the vocabulary
        bos_token: index of beginning of sequence token in the vocabulary
        eos_token: index of end of sequence token in the vocabulary
        batch_size: size of the batch of generated sequences if no starting
            tokens are provided
    """

    @staticmethod
    def create_ports():
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / bert.py View on Github external
output_ports = {"loss": NeuralType(None)}
        return input_ports, output_ports

    def __init__(self, *, num_inputs, **kwargs):
        kwargs["create_port_args"] = {"num_losses": num_inputs}
        LossNM.__init__(self, **kwargs)

    def _loss_function(self, **kwargs):
        values = [kwargs[x] for x in sorted(kwargs.keys())]
        loss = values[0]
        for loss_i in values[1:]:
            loss = loss.add(loss_i.item())
        return loss


class TokenClassificationLoss(TrainableNM):
    @staticmethod
    def create_ports():
        input_ports = {
            "hidden_states":
            NeuralType({
                0: AxisType(BatchTag),
                1: AxisType(TimeTag),
                2: AxisType(ChannelTag)
            }),
            "labels":
            NeuralType({
                0: AxisType(BatchTag),
                1: AxisType(TimeTag)
            }),
            "input_mask":
            NeuralType({
github NVIDIA / NeMo / nemo / nemo / backends / pytorch / common / rnn.py View on Github external
import random

import torch
# noinspection PyPep8Naming
import torch.nn.functional as F
from torch import nn

from nemo.backends.pytorch.common.parts import Attention
from nemo.backends.pytorch.nm import TrainableNM
from nemo.core.neural_types import NeuralType, AxisType, BatchTag, TimeTag, \
    ChannelTag
from nemo.utils.misc import pad_to


class DecoderRNN(TrainableNM):
    """Simple RNN-based decoder with attention.

    Args:
        voc_size (int): Total number of symbols to use
        bos_id (int): Label position of start of string symbol
        hidden_size (int): Size of hidden vector to use in RNN
        attention_method (str): Method of using attention to pass in
            `Attention` constructor.
            Defaults to 'general'.
        attention_type (str): String type of attention describing time to apply
            attention. Could be on of ['post', 'none'].
            Defaults to 'post'.
        in_dropout (float): Float value of embedding dropout.
            Defaults to 0.2.
        gru_dropout (float): Float value of RNN interlayers dropout
            Defaults to 0.2.
github NVIDIA / NeMo / nemo / nemo / backends / pytorch / common / other.py View on Github external
'TableLookUp2',
           'SequenceEmbedding',
           'SequenceProjection',
           'ZerosLikeNM']

from typing import Iterable, Optional, Mapping, Set, Dict

import torch
import torch.nn as nn

from nemo.backends.pytorch.nm import TrainableNM
from nemo.core import NeuralModule
from nemo.core.neural_types import *


class SimpleCombiner(TrainableNM):
    """Performs simple combination of two NmTensors. For example, it can
    perform x1 + x2.

    Args:
        mode (str): Can be ['add', 'sum', 'max'].
            Defaults to 'add'.

    """

    @staticmethod
    def create_ports():
        input_ports = {"x1": NeuralType({}), "x2": NeuralType({})}

        output_ports = {"combined": None}

        return input_ports, output_ports
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / bert.py View on Github external
# Copyright (c) 2019 NVIDIA Corporation
"""
This package contains BERT Neural Module
"""
import torch
import torch.nn as nn

from nemo.backends.pytorch.nm import TrainableNM, LossNM
from nemo.core.neural_types import *
from .transformer import ClassificationLogSoftmax
from .transformer import SmoothedCrossEntropyLoss
from .transformer import SequenceClassificationLoss
from .transformer.utils import transformer_weights_init


class MaskedLanguageModelingLossNM(TrainableNM):
    @staticmethod
    def create_ports():
        input_ports = {
            "log_probs":
            NeuralType({
                0: AxisType(BatchTag),
                1: AxisType(TimeTag),
                2: AxisType(ChannelTag)
            }),
            "output_ids":
            NeuralType({
                0: AxisType(BatchTag),
                1: AxisType(TimeTag)
            }),
            "output_mask":
            NeuralType({
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / huggingface / bert.py View on Github external
from pytorch_transformers import (BertConfig,
                                  BertModel,
                                  BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
                                  BERT_PRETRAINED_CONFIG_ARCHIVE_MAP)

from nemo.backends.pytorch.nm import TrainableNM
from nemo.core.neural_modules import PretrainedModelInfo
from nemo.core.neural_types import (AxisType,
                                    BatchTag,
                                    ChannelTag,
                                    NeuralType,
                                    TimeTag)


class BERT(TrainableNM):
    """
    BERT wraps around the Huggingface implementation of BERT from their
    pytorch-transformers repository for easy use within NeMo.

    Args:
        pretrained_model_name (str): If using a pretrained model, this should
            be the model's name. Otherwise, should be left as None.
        vocab_size (int): Size of the vocabulary file, if not using a
            pretrained model.
        hidden_size (int): Size of the encoder and pooler layers.
        num_hidden_layers (int): Number of hidden layers in the encoder.
        num_attention_heads (int): Number of attention heads for each layer.
        intermediate_size (int): Size of intermediate layers in the encoder.
        hidden_act (str): Activation function for encoder and pooler layers;
            "gelu", "relu", and "swish" are supported.
        max_position_embeddings (int): The maximum number of tokens in a
github NVIDIA / NeMo / collections / nemo_tts / nemo_tts / waveglow_modules.py View on Github external
# Copyright (c) 2019 NVIDIA Corporation
import librosa
import numpy as np
import torch

from nemo.backends.pytorch.nm import TrainableNM, LossNM
from nemo.core.neural_types import *
from .parts.waveglow import WaveGlow


class WaveGlowNM(TrainableNM):
    """
    WaveGlowNM implements the Waveglow model in whole. This NM is meant to
    be used during training

    Args:
        n_mel_channels (int): Size of input mel spectrogram
            Defaults to 80.
        n_flows (int): Number of normalizing flows/layers of waveglow.
            Defaults to 12
        n_group (int): Each audio/spec pair is split in n_group number of
            groups. It must be divisible by 2 as halves are split this way.
            Defaults to 8
        n_early_every (int): After n_early_every layers, n_early_size number of
            groups are skipped to the output of the Neural Module.
            Defaults to 4
        n_early_size (int): The number of groups to skip to the output at every