How to use the sockeye.constants function in sockeye

To help you get started, we’ve selected a few sockeye examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github awslabs / sockeye / sockeye / encoder.py View on Github external
def __init__(self,
                 rnn_config: rnn.RNNConfig,
                 prefix=C.BIDIRECTIONALRNN_PREFIX,
                 layout=C.TIME_MAJOR,
                 encoder_class: Callable = RecurrentEncoder) -> None:
        utils.check_condition(rnn_config.num_hidden % 2 == 0,
                              "num_hidden must be a multiple of 2 for BiDirectionalRNNEncoders.")
        super().__init__(rnn_config.dtype)
        self.rnn_config = rnn_config
        self.internal_rnn_config = rnn_config.copy(num_hidden=rnn_config.num_hidden // 2)
        if layout[0] == 'N':
            logger.warning("Batch-major layout for encoder input. Consider using time-major layout for faster speed")

        # time-major layout as _encode needs to swap layout for SequenceReverse
        self.forward_rnn = encoder_class(rnn_config=self.internal_rnn_config,
                                         prefix=prefix + C.FORWARD_PREFIX,
                                         layout=C.TIME_MAJOR)
        self.reverse_rnn = encoder_class(rnn_config=self.internal_rnn_config,
                                         prefix=prefix + C.REVERSE_PREFIX,
                                         layout=C.TIME_MAJOR)
        self.layout = layout
        self.prefix = prefix
github awslabs / sockeye / test / integration / test_seq_copy_int.py View on Github external
def _test_parameter_averaging(model_path: str):
    """
    Runs parameter averaging with all available strategies
    """
    for strategy in C.AVERAGE_CHOICES:
        points = sockeye.average.find_checkpoints(model_path=model_path,
                                                  size=4,
                                                  strategy=strategy,
                                                  metric=C.PERPLEXITY)
        assert len(points) > 0
        averaged_params = sockeye.average.average(points)
        assert averaged_params
github awslabs / sockeye / sockeye / arguments.py View on Github external
"Use any of 'dist_sync', 'dist_device_sync' and 'dist_async' for distributed "
                                   "training. Default: %(default)s.")
    train_params.add_argument("--gradient-compression-type",
                              type=str,
                              default=C.GRADIENT_COMPRESSION_NONE,
                              choices=C.GRADIENT_COMPRESSION_TYPES,
                              help='Type of gradient compression to use. Default: %(default)s.')
    train_params.add_argument("--gradient-compression-threshold",
                              type=float,
                              default=0.5,
                              help="Threshold for gradient compression if --gctype is '2bit'. Default: %(default)s.")

    train_params.add_argument('--weight-init',
                              type=str,
                              default=C.INIT_XAVIER,
                              choices=C.INIT_TYPES,
                              help='Type of base weight initialization. Default: %(default)s.')
    train_params.add_argument('--weight-init-scale',
                              type=float,
                              default=3.0,
                              help='Weight initialization scale. Applies to uniform (scale) and xavier (magnitude). '
                                   'Default: %(default)s.')
    train_params.add_argument('--weight-init-xavier-factor-type',
                              type=str,
                              default=C.INIT_XAVIER_FACTOR_TYPE_AVG,
                              choices=C.INIT_XAVIER_FACTOR_TYPES,
                              help='Xavier factor type. Default: %(default)s.')
    train_params.add_argument('--weight-init-xavier-rand-type',
                              type=str,
                              default=C.RAND_TYPE_UNIFORM,
                              choices=[C.RAND_TYPE_UNIFORM, C.RAND_TYPE_GAUSSIAN],
                              help='Xavier random number generator type. Default: %(default)s.')
github awslabs / sockeye / sockeye / training.py View on Github external
if os.path.exists(params_file):
            os.unlink(params_file)
        os.symlink(os.path.join("..", params_base_fname), params_file)

        # (2) Optimizer states
        opt_state_fname = os.path.join(training_state_dirname, C.OPT_STATES_LAST)
        self.model.save_optimizer_states(opt_state_fname)

        # (3) Data iterator
        train_iter.save_state(os.path.join(training_state_dirname, C.BUCKET_ITER_STATE_NAME))

        # (4) Random generators
        # RNG states: python's random and np.random provide functions for
        # storing the state, mxnet does not, but inside our code mxnet's RNG is
        # not used AFAIK
        with open(os.path.join(training_state_dirname, C.RNG_STATE_NAME), "wb") as fp:
            pickle.dump(random.getstate(), fp)
            pickle.dump(np.random.get_state(), fp)

        # (5) Training state
        self.state.save(os.path.join(training_state_dirname, C.TRAINING_STATE_NAME))

        # (6) Learning rate scheduler
        with open(os.path.join(training_state_dirname, C.SCHEDULER_STATE_NAME), "wb") as fp:
            pickle.dump(self.optimizer_config.lr_scheduler, fp)

        # First we rename the existing directory to minimize the risk of state
        # loss if the process is aborted during deletion (which will be slower
        # than directory renaming)
        delete_training_state_dirname = os.path.join(self.model.output_dir, C.TRAINING_STATE_TEMP_DELETENAME)
        if os.path.exists(self.training_state_dirname):
            os.rename(self.training_state_dirname, delete_training_state_dirname)
github awslabs / sockeye / sockeye / scoring.py View on Github external
def __init__(self,
                 model: ScoringModel,
                 source_vocabs: List[vocab.Vocab],
                 target_vocab: vocab.Vocab,
                 constant_length_ratio: float = -1.0) -> None:
        self.source_vocab_inv = vocab.reverse_vocab(source_vocabs[0])
        self.target_vocab_inv = vocab.reverse_vocab(target_vocab)
        self.model = model
        self.exclude_list = {source_vocabs[0][C.BOS_SYMBOL], target_vocab[C.EOS_SYMBOL], C.PAD_ID}
        self.constant_length_ratio = constant_length_ratio
github awslabs / sockeye / sockeye / rnn_attention.py View on Github external
Returns context vector and attention probabilities
    via a weighted sum over values.

    :param values: Shape: (batch_size, seq_len, encoder_num_hidden).
    :param length: Shape: (batch_size,).
    :param logits: Shape: (batch_size, seq_len, 1).
    :param dtype: data type.
    :return: context: (batch_size, encoder_num_hidden), attention_probs: (batch_size, seq_len).
    """
    # masks attention scores according to sequence length.
    # (batch_size, seq_len, 1)
    logits = mx.sym.SequenceMask(data=logits,
                                 axis=1,
                                 use_sequence_length=True,
                                 sequence_length=length,
                                 value=-C.LARGE_VALUES[dtype])

    # (batch_size, seq_len, 1)
    probs = mx.sym.softmax(logits, axis=1, name='attention_softmax')

    # batch_dot: (batch, M, K) X (batch, K, N) –> (batch, M, N).
    # (batch_size, seq_len, num_hidden) X (batch_size, seq_len, 1) -> (batch_size, num_hidden, 1)
    context = mx.sym.batch_dot(lhs=values, rhs=probs, transpose_a=True)
    # (batch_size, encoder_num_hidden, 1)-> (batch_size, encoder_num_hidden)
    context = mx.sym.reshape(data=context, shape=(0, 0))
    probs = mx.sym.reshape(data=probs, shape=(0, 0))

    return context, probs
github Cartus / DCGCN / sockeye / arguments.py View on Github external
help='Initial learning rate. Default: %(default)s.')
    train_params.add_argument('--weight-decay',
                              type=float,
                              default=0.0,
                              help='Weight decay constant. Default: %(default)s.')
    train_params.add_argument('--momentum',
                              type=float,
                              default=None,
                              help='Momentum constant. Default: %(default)s.')
    train_params.add_argument('--gradient-clipping-threshold',
                              type=float,
                              default=1.0,
                              help='Clip absolute gradients values greater than this value. '
                                   'Set to negative to disable. Default: %(default)s.')
    train_params.add_argument('--gradient-clipping-type',
                              choices=C.GRADIENT_CLIPPING_TYPES,
                              default=C.GRADIENT_CLIPPING_TYPE_NONE,
                              help='The type of gradient clipping. Default: %(default)s.')

    train_params.add_argument('--learning-rate-scheduler-type',
                              default=C.LR_SCHEDULER_PLATEAU_REDUCE,
                              choices=C.LR_SCHEDULERS,
                              help='Learning rate scheduler type. Default: %(default)s.')
    train_params.add_argument('--learning-rate-reduce-factor',
                              type=float,
                              default=0.7,
                              help="Factor to multiply learning rate with "
                                   "(for 'plateau-reduce' learning rate scheduler). Default: %(default)s.")
    train_params.add_argument('--learning-rate-reduce-num-not-improved',
                              type=int,
                              default=8,
                              help="For 'plateau-reduce' learning rate scheduler. Adjust learning rate "
github awslabs / sockeye / sockeye / train.py View on Github external
:return: The data iterators (train, validation, config_data) as well as the source and target vocabularies.
    """
    num_words_source, num_words_target = args.num_words
    num_words_source = num_words_source if num_words_source > 0 else None
    num_words_target = num_words_target if num_words_target > 0 else None

    word_min_count_source, word_min_count_target = args.word_min_count
    batch_num_devices = 1 if args.use_cpu else sum(-di if di < 0 else 1 for di in args.device_ids)
    batch_by_words = args.batch_type == C.BATCH_TYPE_WORD

    validation_sources = [args.validation_source] + args.validation_source_factors
    validation_sources = [str(os.path.abspath(source)) for source in validation_sources]
    validation_target = str(os.path.abspath(args.validation_target))

    either_raw_or_prepared_error_msg = "Either specify a raw training corpus with %s and %s or a preprocessed corpus " \
                                       "with %s." % (C.TRAINING_ARG_SOURCE,
                                                     C.TRAINING_ARG_TARGET,
                                                     C.TRAINING_ARG_PREPARED_DATA)
    if args.prepared_data is not None:
        utils.check_condition(args.source is None and args.target is None, either_raw_or_prepared_error_msg)
        if not resume_training:
            utils.check_condition(args.source_vocab is None and args.target_vocab is None,
                                  "You are using a prepared data folder, which is tied to a vocabulary. "
                                  "To change it you need to rerun data preparation with a different vocabulary.")
        train_iter, validation_iter, data_config, source_vocabs, target_vocab = data_io.get_prepared_data_iters(
            prepared_data_dir=args.prepared_data,
            validation_sources=validation_sources,
            validation_target=validation_target,
            shared_vocab=shared_vocab,
            batch_size=args.batch_size,
            batch_by_words=batch_by_words,
            batch_num_devices=batch_num_devices)
github awslabs / sockeye / sockeye / inference.py View on Github external
beam_histories[sent]["predicted_tokens"].append([self.vocab_target_inv[x] for x in
                                                                         best_word_indices_sent])
                        # for later sentences in the matrix, shift from e.g. [5, 6, 7, 8, 6] to [0, 1, 3, 4, 1]
                        shifted_parents = best_hyp_indices[rows] - (sent * self.beam_size)
                        beam_histories[sent]["parent_ids"].append(shifted_parents.asnumpy().tolist())

                        beam_histories[sent]["scores"].append(unnormalized_scores[rows].asnumpy().flatten().tolist())
                        beam_histories[sent]["normalized_scores"].append(
                            normalized_scores[rows].asnumpy().flatten().tolist())

            # Collect best hypotheses, best word indices, and attention scores
            best_hyp_indices_list.append(best_hyp_indices)
            best_word_indices_list.append(best_word_indices)
            attentions.append(attention_scores)

            if self.beam_search_stop == C.BEAM_SEARCH_STOP_FIRST:
                at_least_one_finished = finished.reshape((batch_size, self.beam_size)).sum(axis=1) > 0
                if at_least_one_finished.sum().asscalar() == batch_size:
                    break
            else:
                if finished.sum().asscalar() == batch_size * self.beam_size:  # all finished
                    break

            # (9) update models' state with winning hypotheses (ascending)
            for ms in model_states:
                ms.sort_state(best_hyp_indices)

        logger.debug("Finished after %d / %d steps.", t + 1, max_output_length)

        # (9) Sort the hypotheses within each sentence (normalization for finished hyps may have unsorted them).
        folded_accumulated_scores = scores_accumulated.reshape((batch_size,
                                                                self.beam_size * scores_accumulated.shape[-1]))
github awslabs / sockeye / sockeye / arguments.py View on Github external
help='Weight initialization scale. Applies to uniform (scale) and xavier (magnitude). '
                                   'Default: %(default)s.')
    train_params.add_argument('--weight-init-xavier-factor-type',
                              type=str,
                              default=C.INIT_XAVIER_FACTOR_TYPE_AVG,
                              choices=C.INIT_XAVIER_FACTOR_TYPES,
                              help='Xavier factor type. Default: %(default)s.')
    train_params.add_argument('--weight-init-xavier-rand-type',
                              type=str,
                              default=C.RAND_TYPE_UNIFORM,
                              choices=[C.RAND_TYPE_UNIFORM, C.RAND_TYPE_GAUSSIAN],
                              help='Xavier random number generator type. Default: %(default)s.')
    train_params.add_argument('--embed-weight-init',
                              type=str,
                              default=C.EMBED_INIT_DEFAULT,
                              choices=C.EMBED_INIT_TYPES,
                              help='Type of embedding matrix weight initialization. If normal, initializes embedding '
                                   'weights using a normal distribution with std=1/srqt(vocab_size). '
                                   'Default: %(default)s.')
    train_params.add_argument('--initial-learning-rate',
                              type=float,
                              default=0.0002,
                              help='Initial learning rate. Default: %(default)s.')
    train_params.add_argument('--weight-decay',
                              type=float,
                              default=0.0,
                              help='Weight decay constant. Default: %(default)s.')
    train_params.add_argument('--momentum',
                              type=float,
                              default=None,
                              help='Momentum constant. Default: %(default)s.')
    train_params.add_argument('--gradient-clipping-threshold',