How to use the sockeye.encoder.EmbeddingConfig function in sockeye

To help you get started, we’ve selected a few sockeye examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Cartus / DCGCN / sockeye / train.py View on Github external
dropout=args.conv_embed_dropout)

    # config_encoder, encoder_num_hidden = create_encoder_config(args, max_seq_len_source, max_seq_len_target,
    #                                                            config_conv)

    # GRN
    config_encoder, encoder_num_hidden = create_graph_encoder_config(args, edge_vocab_size, max_seq_len_source)

    config_decoder = create_decoder_config(args, encoder_num_hidden, max_seq_len_source, max_seq_len_target)

    source_factor_configs = None
    if len(source_vocab_sizes) > 1:
        source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
                                                                                      args.source_factors_num_embed)]

    config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
                                                  num_embed=num_embed_source,
                                                  dropout=embed_dropout_source,
                                                  factor_configs=source_factor_configs)

    config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
                                                  num_embed=num_embed_target,
                                                  dropout=embed_dropout_target)

    config_loss = loss.LossConfig(name=args.loss,
                                  vocab_size=target_vocab_size,
                                  normalization_type=args.loss_normalization_type,
                                  label_smoothing=args.label_smoothing)

    model_config = model.ModelConfig(config_data=config_data,
                                     vocab_source_size=source_vocab_size,
                                     vocab_target_size=target_vocab_size,
github Cartus / DCGCN / sockeye / train.py View on Github external
# GRN
    config_encoder, encoder_num_hidden = create_graph_encoder_config(args, edge_vocab_size, max_seq_len_source)

    config_decoder = create_decoder_config(args, encoder_num_hidden, max_seq_len_source, max_seq_len_target)

    source_factor_configs = None
    if len(source_vocab_sizes) > 1:
        source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
                                                                                      args.source_factors_num_embed)]

    config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
                                                  num_embed=num_embed_source,
                                                  dropout=embed_dropout_source,
                                                  factor_configs=source_factor_configs)

    config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
                                                  num_embed=num_embed_target,
                                                  dropout=embed_dropout_target)

    config_loss = loss.LossConfig(name=args.loss,
                                  vocab_size=target_vocab_size,
                                  normalization_type=args.loss_normalization_type,
                                  label_smoothing=args.label_smoothing)

    model_config = model.ModelConfig(config_data=config_data,
                                     vocab_source_size=source_vocab_size,
                                     vocab_target_size=target_vocab_size,
                                     config_embed_source=config_embed_source,
                                     config_embed_target=config_embed_target,
                                     config_encoder=config_encoder,
                                     config_decoder=config_decoder,
                                     config_loss=config_loss,
github awslabs / sockeye / sockeye / train.py View on Github external
config_decoder = create_decoder_config(args, encoder_num_hidden, max_seq_len_source, max_seq_len_target,
                                           num_embed_target)

    source_factor_configs = None
    if len(source_vocab_sizes) > 1:
        source_factors_num_embed = args.source_factors_num_embed
        if args.source_factors_combine == C.SOURCE_FACTORS_COMBINE_SUM:
            # If factors are being added instead of concatenated, set all dimensions to the embedding dimensions
            logger.info("Setting all source factor embedding sizes to `num_embed` ('%d') for summing",
                        num_embed_source)
            source_factors_num_embed = [num_embed_source] * len(source_factor_vocab_sizes)

        source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
                                                                                      source_factors_num_embed)]

    config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
                                                  num_embed=num_embed_source,
                                                  dropout=embed_dropout_source,
                                                  factor_configs=source_factor_configs,
                                                  source_factors_combine=args.source_factors_combine)

    config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
                                                  num_embed=num_embed_target,
                                                  dropout=embed_dropout_target)

    config_loss = loss.LossConfig(name=args.loss,
                                  vocab_size=target_vocab_size,
                                  normalization_type=args.loss_normalization_type,
                                  label_smoothing=args.label_smoothing)

    if args.length_task is not None:
        config_length_task = layers.LengthRatioConfig(num_layers=args.length_task_layers, weight=args.length_task_weight)
github awslabs / sockeye / sockeye / train.py View on Github external
if args.source_factors_combine == C.SOURCE_FACTORS_COMBINE_SUM:
            # If factors are being added instead of concatenated, set all dimensions to the embedding dimensions
            logger.info("Setting all source factor embedding sizes to `num_embed` ('%d') for summing",
                        num_embed_source)
            source_factors_num_embed = [num_embed_source] * len(source_factor_vocab_sizes)

        source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
                                                                                      source_factors_num_embed)]

    config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
                                                  num_embed=num_embed_source,
                                                  dropout=embed_dropout_source,
                                                  factor_configs=source_factor_configs,
                                                  source_factors_combine=args.source_factors_combine)

    config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
                                                  num_embed=num_embed_target,
                                                  dropout=embed_dropout_target)

    config_loss = loss.LossConfig(name=args.loss,
                                  vocab_size=target_vocab_size,
                                  normalization_type=args.loss_normalization_type,
                                  label_smoothing=args.label_smoothing)

    if args.length_task is not None:
        config_length_task = layers.LengthRatioConfig(num_layers=args.length_task_layers, weight=args.length_task_weight)
        link = C.LINK_NORMAL if args.length_task == C.LENGTH_TASK_RATIO else C.LINK_POISSON
        config_length_task_loss = loss.LossConfig(name=C.LENRATIO_REGRESSION,
                                                   length_task_link=link,
                                                   length_task_weight=args.length_task_weight)
    else:
        config_length_task = None
github awslabs / sockeye / sockeye / encoder.py View on Github external
else:
                embedding = mx.sym.add_n(embedding, *factor_embeddings, name=self.prefix + "embed_plus_factors")

        if self.config.dropout > 0:
            embedding = mx.sym.Dropout(data=embedding, p=self.config.dropout, name="source_embed_dropout")

        return embedding, data_length, seq_len

    def get_num_hidden(self) -> int:
        """
        Return the representation size of this encoder.
        """
        return self.config.num_embed


class PassThroughEmbeddingConfig(EmbeddingConfig):

    def __init__(self) -> None:
        super().__init__(vocab_size=0, num_embed=0, dropout=0.0, factor_configs=None)


class PassThroughEmbedding(Encoder):
    """
    This is an embedding which passes through an input symbol without doing any operation.

    :param config: PassThroughEmbeddingConfig config.
    """

    def __init__(self,
                 config: PassThroughEmbeddingConfig) -> None:
        super().__init__('float32')
        self.config = config