Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
dropout=args.conv_embed_dropout)
# config_encoder, encoder_num_hidden = create_encoder_config(args, max_seq_len_source, max_seq_len_target,
# config_conv)
# GRN
config_encoder, encoder_num_hidden = create_graph_encoder_config(args, edge_vocab_size, max_seq_len_source)
config_decoder = create_decoder_config(args, encoder_num_hidden, max_seq_len_source, max_seq_len_target)
source_factor_configs = None
if len(source_vocab_sizes) > 1:
source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
args.source_factors_num_embed)]
config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
num_embed=num_embed_source,
dropout=embed_dropout_source,
factor_configs=source_factor_configs)
config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
num_embed=num_embed_target,
dropout=embed_dropout_target)
config_loss = loss.LossConfig(name=args.loss,
vocab_size=target_vocab_size,
normalization_type=args.loss_normalization_type,
label_smoothing=args.label_smoothing)
model_config = model.ModelConfig(config_data=config_data,
vocab_source_size=source_vocab_size,
vocab_target_size=target_vocab_size,
# GRN
config_encoder, encoder_num_hidden = create_graph_encoder_config(args, edge_vocab_size, max_seq_len_source)
config_decoder = create_decoder_config(args, encoder_num_hidden, max_seq_len_source, max_seq_len_target)
source_factor_configs = None
if len(source_vocab_sizes) > 1:
source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
args.source_factors_num_embed)]
config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
num_embed=num_embed_source,
dropout=embed_dropout_source,
factor_configs=source_factor_configs)
config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
num_embed=num_embed_target,
dropout=embed_dropout_target)
config_loss = loss.LossConfig(name=args.loss,
vocab_size=target_vocab_size,
normalization_type=args.loss_normalization_type,
label_smoothing=args.label_smoothing)
model_config = model.ModelConfig(config_data=config_data,
vocab_source_size=source_vocab_size,
vocab_target_size=target_vocab_size,
config_embed_source=config_embed_source,
config_embed_target=config_embed_target,
config_encoder=config_encoder,
config_decoder=config_decoder,
config_loss=config_loss,
config_decoder = create_decoder_config(args, encoder_num_hidden, max_seq_len_source, max_seq_len_target,
num_embed_target)
source_factor_configs = None
if len(source_vocab_sizes) > 1:
source_factors_num_embed = args.source_factors_num_embed
if args.source_factors_combine == C.SOURCE_FACTORS_COMBINE_SUM:
# If factors are being added instead of concatenated, set all dimensions to the embedding dimensions
logger.info("Setting all source factor embedding sizes to `num_embed` ('%d') for summing",
num_embed_source)
source_factors_num_embed = [num_embed_source] * len(source_factor_vocab_sizes)
source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
source_factors_num_embed)]
config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
num_embed=num_embed_source,
dropout=embed_dropout_source,
factor_configs=source_factor_configs,
source_factors_combine=args.source_factors_combine)
config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
num_embed=num_embed_target,
dropout=embed_dropout_target)
config_loss = loss.LossConfig(name=args.loss,
vocab_size=target_vocab_size,
normalization_type=args.loss_normalization_type,
label_smoothing=args.label_smoothing)
if args.length_task is not None:
config_length_task = layers.LengthRatioConfig(num_layers=args.length_task_layers, weight=args.length_task_weight)
if args.source_factors_combine == C.SOURCE_FACTORS_COMBINE_SUM:
# If factors are being added instead of concatenated, set all dimensions to the embedding dimensions
logger.info("Setting all source factor embedding sizes to `num_embed` ('%d') for summing",
num_embed_source)
source_factors_num_embed = [num_embed_source] * len(source_factor_vocab_sizes)
source_factor_configs = [encoder.FactorConfig(size, dim) for size, dim in zip(source_factor_vocab_sizes,
source_factors_num_embed)]
config_embed_source = encoder.EmbeddingConfig(vocab_size=source_vocab_size,
num_embed=num_embed_source,
dropout=embed_dropout_source,
factor_configs=source_factor_configs,
source_factors_combine=args.source_factors_combine)
config_embed_target = encoder.EmbeddingConfig(vocab_size=target_vocab_size,
num_embed=num_embed_target,
dropout=embed_dropout_target)
config_loss = loss.LossConfig(name=args.loss,
vocab_size=target_vocab_size,
normalization_type=args.loss_normalization_type,
label_smoothing=args.label_smoothing)
if args.length_task is not None:
config_length_task = layers.LengthRatioConfig(num_layers=args.length_task_layers, weight=args.length_task_weight)
link = C.LINK_NORMAL if args.length_task == C.LENGTH_TASK_RATIO else C.LINK_POISSON
config_length_task_loss = loss.LossConfig(name=C.LENRATIO_REGRESSION,
length_task_link=link,
length_task_weight=args.length_task_weight)
else:
config_length_task = None
else:
embedding = mx.sym.add_n(embedding, *factor_embeddings, name=self.prefix + "embed_plus_factors")
if self.config.dropout > 0:
embedding = mx.sym.Dropout(data=embedding, p=self.config.dropout, name="source_embed_dropout")
return embedding, data_length, seq_len
def get_num_hidden(self) -> int:
"""
Return the representation size of this encoder.
"""
return self.config.num_embed
class PassThroughEmbeddingConfig(EmbeddingConfig):
def __init__(self) -> None:
super().__init__(vocab_size=0, num_embed=0, dropout=0.0, factor_configs=None)
class PassThroughEmbedding(Encoder):
"""
This is an embedding which passes through an input symbol without doing any operation.
:param config: PassThroughEmbeddingConfig config.
"""
def __init__(self,
config: PassThroughEmbeddingConfig) -> None:
super().__init__('float32')
self.config = config