How to use the espnet.utils.cli_utils.strtobool function in espnet

To help you get started, we’ve selected a few espnet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github espnet / espnet / espnet / bin / st_train.py View on Github external
type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
                        help='List of decoder modules to initialize, separated by a comma.')
    # multilingual related
    parser.add_argument('--multilingual', default=False, type=strtobool,
                        help='Prepend target language ID to the source sentence. \
                        Both source/target language IDs must be prepend in the pre-processing stage.')
    parser.add_argument('--replace-sos', default=False, type=strtobool,
                        help='Replace  in the decoder with a target language ID \
                              (the first token in the target sequence)')
    # Feature transform: Normalization
    parser.add_argument('--stats-file', type=str, default=None,
                        help='The stats file for the feature normalization')
    parser.add_argument('--apply-uttmvn', type=strtobool, default=True,
                        help='Apply utterance level mean '
                             'variance normalization.')
    parser.add_argument('--uttmvn-norm-means', type=strtobool,
                        default=True, help='')
    parser.add_argument('--uttmvn-norm-vars', type=strtobool, default=False,
                        help='')
    # Feature transform: Fbank
    parser.add_argument('--fbank-fs', type=int, default=16000,
                        help='The sample frequency used for '
                             'the mel-fbank creation.')
    parser.add_argument('--n-mels', type=int, default=80,
                        help='The number of mel-frequency bins.')
    parser.add_argument('--fbank-fmin', type=float, default=0.,
                        help='')
    parser.add_argument('--fbank-fmax', type=float, default=None,
                        help='')
    return parser
github espnet / espnet / espnet / bin / st_train.py View on Github external
parser.add_argument('--criterion', default='acc', type=str,
                        choices=['loss', 'acc'],
                        help='Criterion to perform epsilon decay')
    parser.add_argument('--threshold', default=1e-4, type=float,
                        help='Threshold to stop iteration')
    parser.add_argument('--epochs', '-e', default=30, type=int,
                        help='Maximum number of epochs')
    parser.add_argument('--early-stop-criterion', default='validation/main/acc', type=str, nargs='?',
                        help="Value to monitor to trigger an early stopping of the training")
    parser.add_argument('--patience', default=3, type=int, nargs='?',
                        help="Number of epochs to wait without improvement before stopping the training")
    parser.add_argument('--grad-clip', default=5, type=float,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--num-save-attention', default=3, type=int,
                        help='Number of samples of attention to be saved')
    parser.add_argument('--grad-noise', type=strtobool, default=False,
                        help='The flag to switch to use noise injection to gradients during training')
    # speech translation related
    parser.add_argument('--context-residual', default=False, type=strtobool, nargs='?',
                        help='The flag to switch to use context vector residual in the decoder network')
    # finetuning related
    parser.add_argument('--enc-init', default=None, type=str, nargs='?',
                        help='Pre-trained ASR model to initialize encoder.')
    parser.add_argument('--enc-init-mods', default='enc.enc.',
                        type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
                        help='List of encoder modules to initialize, separated by a comma.')
    parser.add_argument('--dec-init', default=None, type=str, nargs='?',
                        help='Pre-trained ASR, MT or LM model to initialize decoder.')
    parser.add_argument('--dec-init-mods', default='att., dec.',
                        type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
                        help='List of decoder modules to initialize, separated by a comma.')
    # multilingual related
github espnet / espnet / espnet / bin / asr_rnnt_train.py View on Github external
parser.add_argument('--criterion', default='loss', type=str,
                        choices=['loss'],
                        help='Criterion to perform epsilon decay')
    parser.add_argument('--threshold', default=1e-4, type=float,
                        help='Threshold to stop iteration')
    parser.add_argument('--epochs', '-e', default=30, type=int,
                        help='Maximum number of epochs')
    parser.add_argument('--early-stop-criterion', default='validation/main/loss', type=str, nargs='?',
                        help="Value to monitor to trigger an early stopping of the training")
    parser.add_argument('--patience', default=3, type=int, nargs='?',
                        help="Number of epochs to wait without improvement before stopping the training")
    parser.add_argument('--grad-clip', default=5, type=float,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--num-save-attention', default=3, type=int,
                        help='Number of samples of attention to be saved')
    parser.add_argument('--grad-noise', type=strtobool, default=False,
                        help='The flag to switch to use noise injection to gradients during training')
    # finetuning related
    parser.add_argument('--enc-init', default=None, type=str,
                        help='Initialize encoder model part from pre-trained ESPNET ASR model.')
    parser.add_argument('--enc-init-mods', default='enc.enc.',
                        type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
                        help='List of encoder modules to initialize, separated by a comma.')
    parser.add_argument('--dec-init', default=None, type=str,
                        help='Initialize decoder model part from pre-trained ESPNET ASR or LM model.')
    parser.add_argument('--dec-init-mods', default='att.,dec.decoder.,dec.att.,dec.embed.',
                        type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
                        help='List of decoder modules to initialize, separated by a comma.')
    parser.add_argument('--freeze-modules', default='',
                        type=lambda s: [str(mod) for mod in s.split(',') if s != ''],
                        help='List of modules to freeze, separated by a comma.')
    # speech translation related
github espnet / espnet / espnet / bin / tts_train.py View on Github external
help='Maximum bins in a minibatch (0 to disable)')
    parser.add_argument('--batch-frames-in', default=0, type=int,
                        help='Maximum input frames in a minibatch (0 to disable)')
    parser.add_argument('--batch-frames-out', default=0, type=int,
                        help='Maximum output frames in a minibatch (0 to disable)')
    parser.add_argument('--batch-frames-inout', default=0, type=int,
                        help='Maximum input+output frames in a minibatch (0 to disable)')
    parser.add_argument('--maxlen-in', '--batch-seq-maxlen-in', default=100, type=int, metavar='ML',
                        help='When --batch-count=seq, batch size is reduced if the input sequence length > ML.')
    parser.add_argument('--maxlen-out', '--batch-seq-maxlen-out', default=200, type=int, metavar='ML',
                        help='When --batch-count=seq, batch size is reduced if the output sequence length > ML')
    parser.add_argument('--num-iter-processes', default=0, type=int,
                        help='Number of processes of iterator')
    parser.add_argument('--preprocess-conf', type=str, default=None,
                        help='The configuration file for the pre-processing')
    parser.add_argument('--use-speaker-embedding', default=False, type=strtobool,
                        help='Whether to use speaker embedding')
    parser.add_argument('--use-second-target', default=False, type=strtobool,
                        help='Whether to use second target')
    # optimization related
    parser.add_argument('--opt', default='adam', type=str,
                        choices=['adam', 'noam'],
                        help='Optimizer')
    parser.add_argument('--accum-grad', default=1, type=int,
                        help='Number of gradient accumuration')
    parser.add_argument('--lr', default=1e-3, type=float,
                        help='Learning rate for optimizer')
    parser.add_argument('--eps', default=1e-6, type=float,
                        help='Epsilon for optimizer')
    parser.add_argument('--weight-decay', default=1e-6, type=float,
                        help='Weight decay coefficient for optimizer')
    parser.add_argument('--epochs', '-e', default=30, type=int,
github espnet / espnet / espnet / nets / pytorch_backend / e2e_tts_fastspeech.py View on Github external
help="Number of postnet layers")
        group.add_argument("--postnet-chans", default=256, type=int,
                           help="Number of postnet channels")
        group.add_argument("--postnet-filts", default=5, type=int,
                           help="Filter size of postnet")
        group.add_argument("--use-batch-norm", default=True, type=strtobool,
                           help="Whether to use batch normalization")
        group.add_argument("--use-scaled-pos-enc", default=True, type=strtobool,
                           help="Use trainable scaled positional encoding instead of the fixed scale one")
        group.add_argument("--encoder-normalize-before", default=False, type=strtobool,
                           help="Whether to apply layer norm before encoder block")
        group.add_argument("--decoder-normalize-before", default=False, type=strtobool,
                           help="Whether to apply layer norm before decoder block")
        group.add_argument("--encoder-concat-after", default=False, type=strtobool,
                           help="Whether to concatenate attention layer's input and output in encoder")
        group.add_argument("--decoder-concat-after", default=False, type=strtobool,
                           help="Whether to concatenate attention layer's input and output in decoder")
        group.add_argument("--duration-predictor-layers", default=2, type=int,
                           help="Number of layers in duration predictor")
        group.add_argument("--duration-predictor-chans", default=384, type=int,
                           help="Number of channels in duration predictor")
        group.add_argument("--duration-predictor-kernel-size", default=3, type=int,
                           help="Kernel size in duration predictor")
        group.add_argument("--teacher-model", default=None, type=str, nargs="?",
                           help="Teacher model file path")
        group.add_argument("--reduction-factor", default=1, type=int,
                           help="Reduction factor")
        group.add_argument("--spk-embed-dim", default=None, type=int,
                           help="Number of speaker embedding dimensions")
        group.add_argument("--spk-embed-integration-type", type=str, default="add",
                           choices=["add", "concat"],
                           help="How to integrate speaker embedding")
github espnet / espnet / espnet / nets / pytorch_backend / e2e_tts_fastspeech.py View on Github external
group.add_argument("--positionwise-layer-type", default="linear", type=str,
                           choices=["linear", "conv1d", "conv1d-linear"],
                           help="Positionwise layer type.")
        group.add_argument("--positionwise-conv-kernel-size", default=3, type=int,
                           help="Kernel size of positionwise conv1d layer")
        group.add_argument("--postnet-layers", default=0, type=int,
                           help="Number of postnet layers")
        group.add_argument("--postnet-chans", default=256, type=int,
                           help="Number of postnet channels")
        group.add_argument("--postnet-filts", default=5, type=int,
                           help="Filter size of postnet")
        group.add_argument("--use-batch-norm", default=True, type=strtobool,
                           help="Whether to use batch normalization")
        group.add_argument("--use-scaled-pos-enc", default=True, type=strtobool,
                           help="Use trainable scaled positional encoding instead of the fixed scale one")
        group.add_argument("--encoder-normalize-before", default=False, type=strtobool,
                           help="Whether to apply layer norm before encoder block")
        group.add_argument("--decoder-normalize-before", default=False, type=strtobool,
                           help="Whether to apply layer norm before decoder block")
        group.add_argument("--encoder-concat-after", default=False, type=strtobool,
                           help="Whether to concatenate attention layer's input and output in encoder")
        group.add_argument("--decoder-concat-after", default=False, type=strtobool,
                           help="Whether to concatenate attention layer's input and output in decoder")
        group.add_argument("--duration-predictor-layers", default=2, type=int,
                           help="Number of layers in duration predictor")
        group.add_argument("--duration-predictor-chans", default=384, type=int,
                           help="Number of channels in duration predictor")
        group.add_argument("--duration-predictor-kernel-size", default=3, type=int,
                           help="Kernel size in duration predictor")
        group.add_argument("--teacher-model", default=None, type=str, nargs="?",
                           help="Teacher model file path")
        group.add_argument("--reduction-factor", default=1, type=int,
github espnet / espnet / espnet / nets / pytorch_backend / e2e_asr_mulenc.py View on Github external
def ctc_add_arguments(parser):
        """Add arguments for ctc in multi-encoder setting."""
        group = parser.add_argument_group("E2E multi-ctc setting")
        group.add_argument('--share-ctc', type=strtobool, default=False,
                           help='The flag to switch to share ctc across multiple encoders '
                                '(multi-encoder asr mode only).')
        group.add_argument('--weights-ctc-train', type=float, action='append',
                           help='ctc weight assigned to each encoder during training.')
        group.add_argument('--weights-ctc-dec', type=float, action='append',
                           help='ctc weight assigned to each encoder during decoding.')
        return parser
github espnet / espnet / espnet / nets / pytorch_backend / e2e_asr_transducer.py View on Github external
help='Dropout rate for the decoder')
        # prediction
        group.add_argument('--dec-embed-dim', default=320, type=int,
                           help='Number of decoder embeddings dimensions')
        parser.add_argument('--dropout-rate-embed-decoder', default=0.0, type=float,
                            help='Dropout rate for the decoder embeddings')
        # general
        group.add_argument('--rnnt_type', default='warp-transducer', type=str,
                           choices=['warp-transducer'],
                           help='Type of transducer implementation to calculate loss.')
        parser.add_argument('--rnnt-mode', default='rnnt', type=str, choices=['rnnt', 'rnnt-att'],
                            help='RNN-Transducing mode')
        parser.add_argument('--joint-dim', default=320, type=int,
                            help='Number of dimensions in joint space')
        # decoding
        parser.add_argument('--score-norm-transducer', type=strtobool, nargs='?',
                            default=True,
                            help='Normalize transducer scores by length')
github espnet / espnet / espnet / nets / pytorch_backend / e2e_tts_fastspeech.py View on Github external
group.add_argument("--dlayers", default=6, type=int,
                           help="Number of decoder layers")
        group.add_argument("--dunits", default=1536, type=int,
                           help="Number of decoder hidden units")
        group.add_argument("--positionwise-layer-type", default="linear", type=str,
                           choices=["linear", "conv1d", "conv1d-linear"],
                           help="Positionwise layer type.")
        group.add_argument("--positionwise-conv-kernel-size", default=3, type=int,
                           help="Kernel size of positionwise conv1d layer")
        group.add_argument("--postnet-layers", default=0, type=int,
                           help="Number of postnet layers")
        group.add_argument("--postnet-chans", default=256, type=int,
                           help="Number of postnet channels")
        group.add_argument("--postnet-filts", default=5, type=int,
                           help="Filter size of postnet")
        group.add_argument("--use-batch-norm", default=True, type=strtobool,
                           help="Whether to use batch normalization")
        group.add_argument("--use-scaled-pos-enc", default=True, type=strtobool,
                           help="Use trainable scaled positional encoding instead of the fixed scale one")
        group.add_argument("--encoder-normalize-before", default=False, type=strtobool,
                           help="Whether to apply layer norm before encoder block")
        group.add_argument("--decoder-normalize-before", default=False, type=strtobool,
                           help="Whether to apply layer norm before decoder block")
        group.add_argument("--encoder-concat-after", default=False, type=strtobool,
                           help="Whether to concatenate attention layer's input and output in encoder")
        group.add_argument("--decoder-concat-after", default=False, type=strtobool,
                           help="Whether to concatenate attention layer's input and output in decoder")
        group.add_argument("--duration-predictor-layers", default=2, type=int,
                           help="Number of layers in duration predictor")
        group.add_argument("--duration-predictor-chans", default=384, type=int,
                           help="Number of channels in duration predictor")
        group.add_argument("--duration-predictor-kernel-size", default=3, type=int,
github espnet / espnet / espnet / bin / asr_train.py View on Github external
parser.add_argument('--bprojs', type=int, default=300,
                        help='')
    parser.add_argument('--badim', type=int, default=320,
                        help='')
    parser.add_argument('--bnmask', type=int, default=2,
                        help='Number of beamforming masks, '
                             'default is 2 for [speech, noise].')
    parser.add_argument('--ref-channel', type=int, default=-1,
                        help='The reference channel used for beamformer. '
                             'By default, the channel is estimated by DNN.')
    parser.add_argument('--bdropout-rate', type=float, default=0.0,
                        help='')
    # Feature transform: Normalization
    parser.add_argument('--stats-file', type=str, default=None,
                        help='The stats file for the feature normalization')
    parser.add_argument('--apply-uttmvn', type=strtobool, default=True,
                        help='Apply utterance level mean '
                             'variance normalization.')
    parser.add_argument('--uttmvn-norm-means', type=strtobool,
                        default=True, help='')
    parser.add_argument('--uttmvn-norm-vars', type=strtobool, default=False,
                        help='')
    # Feature transform: Fbank
    parser.add_argument('--fbank-fs', type=int, default=16000,
                        help='The sample frequency used for '
                             'the mel-fbank creation.')
    parser.add_argument('--n-mels', type=int, default=80,
                        help='The number of mel-frequency bins.')
    parser.add_argument('--fbank-fmin', type=float, default=0.,
                        help='')
    parser.add_argument('--fbank-fmax', type=float, default=None,
                        help='')