Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
max_positions=1024, convolutions=((512, 3),) * 20, dropout=0.1):
super().__init__(dictionary)
self.dropout = dropout
self.num_attention_layers = None
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
if embed_dict:
self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens)
self.embed_positions = PositionalEmbedding(
max_positions,
embed_dim,
padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_SOURCE,
)
in_channels = convolutions[0][0]
self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
self.projections = nn.ModuleList()
self.convolutions = nn.ModuleList()
for (out_channels, kernel_size) in convolutions:
self.projections.append(Linear(in_channels, out_channels)
if in_channels != out_channels else None)
if kernel_size % 2 == 1:
padding = kernel_size // 2
else:
padding = 0
self.convolutions.append(
ConvTBC(in_channels, out_channels * 2, kernel_size,
dropout=dropout, padding=padding)
def get_trainer_and_epoch_itr(epoch, epoch_size, num_updates, iterations_in_epoch):
tokens = torch.LongTensor(list(range(epoch_size))).view(1, -1)
tokens_ds = data.TokenBlockDataset(
tokens, sizes=[tokens.size(-1)], block_size=1, pad=0, eos=1, include_targets=False,
)
trainer = mock_trainer(epoch, num_updates, iterations_in_epoch)
dataset = data.LanguagePairDataset(tokens_ds, tokens_ds.sizes, mock_dict(), shuffle=False)
epoch_itr = data.EpochBatchIterator(
dataset=dataset,
collate_fn=dataset.collater,
batch_sampler=[[i] for i in range(epoch_size)],
)
return trainer, epoch_itr
def language_pair_dataset(lang_pair):
src, tgt = lang_pair.split('-')
src_dataset, tgt_dataset = src_datasets[lang_pair], tgt_datasets[lang_pair]
return self.alter_dataset_langtok(
LanguagePairDataset(
src_dataset, src_dataset.sizes, self.dicts[src],
tgt_dataset, tgt_dataset.sizes, self.dicts[tgt],
left_pad_source=self.args.left_pad_source,
left_pad_target=self.args.left_pad_target,
max_source_positions=self.args.max_source_positions,
max_target_positions=self.args.max_target_positions,
),
self.dicts[src].eos(),
src,
self.dicts[tgt].eos(),
tgt,
)
assert pos == "learned" or pos == "timing" or pos == "nopos"
self.dropout = dropout
self.attention_dropout = attention_dropout
self.relu_dropout = relu_dropout
self.pos = pos
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
if self.pos == "learned":
self.embed_positions = PositionalEmbedding(max_positions, embed_dim, padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_SOURCE)
if self.pos == "timing":
self.embed_positions = SinusoidalPositionalEmbedding(embed_dim, padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_SOURCE)
self.layers = num_layers
self.attnpath = AttnPathEncoder(self.layers, num_heads=num_heads,
filter_size=filter_size, hidden_size=hidden_size,
dropout=dropout, attention_dropout=attention_dropout,
relu_dropout=relu_dropout)
self.cnnpath = CNNPathEncoder(self.layers, hidden_size=hidden_size, dropout=dropout,
in_embed=hidden_size, out_embed=hidden_size)
def merge(key, left_pad, move_eos_to_beginning=False):
return LanguagePairDataset.collate_tokens(
[s[key] for s in samples],
pad_idx, eos_idx, left_pad, move_eos_to_beginning,
)
if self.char_source_dict is not None:
char_data_class = (
char_data.LanguagePairCharDataset
if self.char_target_dict is not None
else char_data.LanguagePairSourceCharDataset
)
datasets[key] = char_data_class(
src=src_dataset,
src_sizes=src_sizes,
src_dict=self.source_dictionary,
tgt=tgt_dataset,
tgt_sizes=tgt_dataset.sizes,
tgt_dict=self.target_dictionary,
)
else:
datasets[key] = LanguagePairDataset(
src=src_dataset,
src_sizes=src_sizes,
src_dict=self.source_dictionary,
tgt=tgt_dataset,
tgt_sizes=tgt_dataset.sizes,
tgt_dict=self.target_dictionary,
left_pad_source=False,
)
total_line_count = sum(len(datasets[key]) for key in datasets)
if dataset_relative_ratio:
ds, ratio = dataset_relative_ratio
line_count = len(datasets[ds])
# By definition ratio = u * line_count / sum(#lines of other datasets)
u = (total_line_count - line_count) / line_count * ratio
dataset_upsampling = {key: u}
elif not dataset_upsampling:
def make_batches(lines, args, src_dict, max_positions, bpe=None):
tokens = [
tokenizer.Tokenizer.tokenize(src_str, src_dict, tokenize=tokenizer.tokenize_en, add_if_not_exist=False, bpe=bpe).long()
for src_str in lines
]
lengths = np.array([t.numel() for t in tokens])
itr = data.EpochBatchIterator(
dataset=data.LanguagePairDataset(tokens, lengths, src_dict),
max_tokens=args.max_tokens,
max_sentences=args.max_sentences,
max_positions=max_positions,
).next_epoch_itr(shuffle=False)
for batch in itr:
yield Batch(
srcs=[lines[i] for i in batch['id']],
tokens=batch['net_input']['src_tokens'],
lengths=batch['net_input']['src_lengths'],
), batch['id']
def make_batches(lines, args, src_dict, max_positions, tgt_str=None, tgt_dict=None):
tokens = [
tokenizer.Tokenizer.tokenize(src_str, src_dict, add_if_not_exist=False).long()
for src_str in lines
]
if not tgt_str is None:
tgt_tokens = [
tokenizer.Tokenizer.tokenize(tgt_str, tgt_dict, add_if_not_exist=False).long()
]
else:
tgt_tokens = None
lengths = np.array([t.numel() for t in tokens])
itr = data.EpochBatchIterator(
dataset=data.LanguagePairDataset(tokens, lengths, src_dict, tgt=tgt_tokens, tgt_sizes=None, tgt_dict=tgt_dict),
max_tokens=args.max_tokens,
max_sentences=args.max_sentences,
max_positions=max_positions,
).next_epoch_itr(shuffle=False)
for batch in itr:
if not tgt_str is None:
yield Batch(
srcs=[lines[i] for i in batch['id']],
tokens=batch['net_input']['src_tokens'],
lengths=batch['net_input']['src_lengths'],
prefix=batch['target'][:, :3],
), batch['id']
else:
yield Batch(
srcs=[lines[i] for i in batch['id']],
tokens=batch['net_input']['src_tokens'],
assert pos == "learned" or pos == "timing" or pos == "nopos"
self.dropout = dropout
self.attention_dropout = attention_dropout
self.relu_dropout = relu_dropout
self.pos = pos
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
if self.pos == "learned":
self.embed_positions = PositionalEmbedding(max_positions, embed_dim, padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_TARGET)
if self.pos == "timing":
self.embed_positions = SinusoidalPositionalEmbedding(embed_dim, padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_TARGET)
self.layers = num_layers
self.self_attention_blocks = nn.ModuleList()
self.encdec_attention_blocks = nn.ModuleList()
self.ffn_blocks = nn.ModuleList()
self.norm1_blocks = nn.ModuleList()
self.norm2_blocks = nn.ModuleList()
self.norm3_blocks = nn.ModuleList()
for i in range(num_layers):
self.self_attention_blocks.append(MultiheadAttentionDecoder(hidden_size,
hidden_size,
hidden_size,
num_heads))
self.ffn_blocks.append(FeedForwardNetwork(hidden_size, filter_size, relu_dropout))
self.norm1_blocks.append(LayerNormalization(hidden_size))
dropout=0.1, attention_dropout=0.1, relu_dropout=0.1, share_embed=False):
super().__init__(dictionary)
self.register_buffer('version', torch.Tensor([2]))
assert pos == "learned" or pos == "timing" or pos == "nopos"
self.dropout = dropout
self.attention_dropout = attention_dropout
self.relu_dropout = relu_dropout
self.pos = pos
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
if self.pos == "learned":
self.embed_positions = PositionalEmbedding(max_positions, embed_dim, padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_TARGET)
if self.pos == "timing":
self.embed_positions = SinusoidalPositionalEmbedding(embed_dim, padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_TARGET)
self.layers = num_layers
self.self_attention_blocks = nn.ModuleList()
self.encdec_attention_blocks = nn.ModuleList()
self.ffn_blocks = nn.ModuleList()
self.norm1_blocks = nn.ModuleList()
self.norm2_blocks = nn.ModuleList()
self.norm3_blocks = nn.ModuleList()
for i in range(num_layers):
self.self_attention_blocks.append(MultiheadAttentionDecoder(hidden_size,
hidden_size,
hidden_size,