Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
initial_input = torch.from_numpy(initial_input).view(
1, 1, hparams.quantize_channels)
else:
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
# Run the model in fast eval mode
with torch.no_grad():
y_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, softmax=True, quantize=True, tqdm=tqdm,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
# Save audio
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y_target, sr=hparams.sample_rate)
# save figure
path = join(eval_dir, "step{:09d}_waveplots.png".format(global_step))
save_waveplot(path, y_hat, y_target)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
y_teacher = teacher.incremental_forward(
initial_input, c=c, g=g, T=length, tqdm=tqdm, softmax=True, quantize=True,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_teacher.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_teacher.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_teacher.view(-1).cpu().data.numpy()
# y_student
# z noise sample from logistic
z = np.random.logistic(0, 1, y_target.shape)
mu, scale = student(z, c, g=g)
m, s = to_numpy(mu), to_numpy(scale)
student_predict = np.random.logistic(m, s)
# Save audio
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_teacher_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
y_hat, c_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, tqdm=tqdm, softmax=True, quantize=True,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
# Save audio and partial spectrogram
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y_target, sr=hparams.sample_rate)
# path = join(eval_dir, "step{:09d}_modal_output.csv".format(global_step))
idx = np.random.randint(0, len(y_hat))
length = input_lengths[idx].data.cpu().numpy()[0]
# (B, C, T)
if y_hat.dim() == 4:
y_hat = y_hat.squeeze(-1)
if is_mulaw_quantize(hparams.input_type):
# (B, T)
y_hat = F.softmax(y_hat, dim=1).max(1)[1]
# (T,)
y_hat = y_hat[idx].data.cpu().long().numpy()
y = y[idx].view(-1).data.cpu().long().numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y = P.inv_mulaw_quantize(y, hparams.quantize_channels)
else:
# (B, T)
y_hat = sample_from_discretized_mix_logistic(
y_hat, log_scale_min=hparams.log_scale_min)
# (T,)
y_hat = y_hat[idx].view(-1).data.cpu().numpy()
y = y[idx].view(-1).data.cpu().numpy()
if is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat, hparams.quantize_channels)
y = P.inv_mulaw(y, hparams.quantize_channels)
# Mask by length
y_hat[length:] = 0
y[length:] = 0
# Scale prediction distribution using temperature.
np.seterr(divide='ignore')
scaled_prediction = np.log(prediction) / args.temperature
scaled_prediction = (scaled_prediction -
np.logaddexp.reduce(scaled_prediction))
scaled_prediction = np.exp(scaled_prediction)
np.seterr(divide='warn')
# print(quantization_channels, scaled_prediction)
sample = np.random.choice(
np.arange(quantization_channels), p=scaled_prediction)
waveform.append(sample)
# If we have partial writing, save the result so far.
if (wav_out_path and args.save_every and
(step + 1) % args.save_every == 0):
out = P.inv_mulaw_quantize(np.array(waveform), quantization_channels)
write_wav(out, hparams.sample_rate, wav_out_path)
# Introduce a newline to clear the carriage return from the progress.
print()
# Save the result as a wav file.
if wav_out_path:
out = P.inv_mulaw_quantize(np.array(waveform).astype(np.int16), quantization_channels)
# out = P.inv_mulaw_quantize(np.asarray(waveform), quantization_channels)
write_wav(out, hparams.sample_rate, wav_out_path)
print('Finished generating.')
length = input_lengths[idx].data.cpu().item()
# (B, C, T)
if y_hat.dim() == 4:
y_hat = y_hat.squeeze(-1)
if is_mulaw_quantize(hparams.input_type):
# (B, T)
y_hat = F.softmax(y_hat, dim=1).max(1)[1]
# (T,)
y_hat = y_hat[idx].data.cpu().long().numpy()
y = y[idx].view(-1).data.cpu().long().numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y = P.inv_mulaw_quantize(y, hparams.quantize_channels)
else:
# (B, T)
y_hat = sample_from_discretized_mix_logistic(
y_hat, log_scale_min=hparams.log_scale_min)
# (T,)
y_hat = y_hat[idx].view(-1).data.cpu().numpy()
y = y[idx].view(-1).data.cpu().numpy()
if is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat, hparams.quantize_channels)
y = P.inv_mulaw(y, hparams.quantize_channels)
# Mask by length
y_hat[length:] = 0
y[length:] = 0
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
# Run the model in fast eval mode
y_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, softmax=True, quantize=True, tqdm=tqdm,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
# Save audio
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y_target, sr=hparams.sample_rate)
# save figure
path = join(eval_dir, "step{:09d}_waveplots.png".format(global_step))
save_waveplot(path, y_hat, y_target,writer,global_step)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
y_hat, c_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, tqdm=tqdm, softmax=True, quantize=True,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
# Save audio and partial spectrogram
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y_target, sr=hparams.sample_rate)
# path = join(eval_dir, "step{:09d}_modal_output.csv".format(global_step))