Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
assert P.mulaw_quantize(-1.0, 2) == 0
assert P.mulaw_quantize(-0.5, 2) == 0
assert P.mulaw_quantize(-0.001, 2) == 0
assert P.mulaw_quantize(0.0, 2) == 1
assert P.mulaw_quantize(0.0001, 2) == 1
assert P.mulaw_quantize(0.5, 2) == 1
assert P.mulaw_quantize(0.99999, 2) == 1
assert P.mulaw_quantize(1.0, 2) == 2
np.random.seed(1234)
# forward/backward correctness
for mu in [128, 256, 512]:
for x in np.random.rand(100):
y = P.mulaw(x, mu)
assert y >= 0 and y <= 1
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
# forward/backward correctness for quantize
for mu in [128, 256, 512]:
for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
y_hat = P.mulaw_quantize(x, mu)
err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
print(y, y_hat, err)
assert np.allclose(y, y_hat)
# have small quantize error
assert err <= 0.1
# ndarray input
for mu in [128, 256, 512]:
x = np.random.rand(10)
y = P.mulaw(x, mu)
target_wav_path = join(dst_dir, "speaker{}_{}_{}{}_target.wav".format(
g, idx, checkpoint_name, file_name_suffix))
# Generate
global_conditioning = hparams.gin_channels > 0
if not global_conditioning:
g = None
waveform = wavegen(model, length, c=c, g=g, initial_value=initial_value,
fast=True, tqdm=_tqdm)
# save
librosa.output.write_wav(dst_wav_path, waveform, sr=hparams.sample_rate)
if is_mulaw_quantize(hparams.input_type):
x = P.inv_mulaw_quantize(x, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
x = P.inv_mulaw(x, hparams.quantize_channels)
librosa.output.write_wav(target_wav_path, x, sr=hparams.sample_rate)
# log
if output_html:
print("""
<audio controls="controls">
<source autoplay="" src="/{}/audio/{}/{}">
Your browser does not support the audio element.
</audio>
""".format(hparams.name, dst_dir_name, basename(dst_wav_path)))
print("Finished! Check out {} for generated audio samples.".format(dst_dir))
sys.exit(0)
# Transform data to GPU
initial_input = initial_input.to(device)
g = None if g is None else g.to(device)
c = None if c is None else c.to(device)
with torch.no_grad():
y_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, tqdm=tqdm, softmax=True, quantize=True,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
return y_hat
y_hat = y_hat[idx].data.cpu().long().numpy()
y = y[idx].view(-1).data.cpu().long().numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y = P.inv_mulaw_quantize(y, hparams.quantize_channels)
else:
# (B, T)
y_hat = sample_from_discretized_mix_logistic(
y_hat, log_scale_min=hparams.log_scale_min)
# (T,)
y_hat = y_hat[idx].view(-1).data.cpu().numpy()
y = y[idx].view(-1).data.cpu().numpy()
if is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat, hparams.quantize_channels)
y = P.inv_mulaw(y, hparams.quantize_channels)
# Mask by length
y_hat[length:] = 0
y[length:] = 0
# Save audio
audio_dir = join(checkpoint_dir, "audio")
os.makedirs(audio_dir, exist_ok=True)
path = join(audio_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(audio_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y, sr=hparams.sample_rate)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
y_hat, c_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, tqdm=tqdm, softmax=True, quantize=True,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
# Save audio and partial spectrogram
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y_target, sr=hparams.sample_rate)
# path = join(eval_dir, "step{:09d}_modal_output.csv".format(global_step))
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
# Run the model in fast eval mode
y_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, softmax=True, quantize=True, tqdm=tqdm,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
# Save audio
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y_target, sr=hparams.sample_rate)
# save figure
path = join(eval_dir, "step{:09d}_waveplots.png".format(global_step))
save_waveplot(path, y_hat, y_target,writer,global_step)
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
y_teacher = teacher.incremental_forward(
initial_input, c=c, g=g, T=length, tqdm=tqdm, softmax=True, quantize=True,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_teacher.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_teacher.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_teacher.view(-1).cpu().data.numpy()
# y_student
# z noise sample from logistic
z = np.random.logistic(0, 1, y_target.shape)
mu, scale = student(z, c, g=g)
m, s = to_numpy(mu), to_numpy(scale)
student_predict = np.random.logistic(m, s)
# Save audio
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_teacher_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_student_predicted.wav".format(global_step))
librosa.output.write_wav(path, student_predict, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
# Run the model in fast eval mode
with torch.no_grad():
y_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, softmax=True, quantize=True, tqdm=tqdm,
log_scale_min=hparams.log_scale_min)
if is_mulaw_quantize(hparams.input_type):
y_hat = y_hat.max(1)[1].view(-1).long().cpu().data.numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y_target = P.inv_mulaw_quantize(y_target, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat.view(-1).cpu().data.numpy(), hparams.quantize_channels)
y_target = P.inv_mulaw(y_target, hparams.quantize_channels)
else:
y_hat = y_hat.view(-1).cpu().data.numpy()
# Save audio
os.makedirs(eval_dir, exist_ok=True)
path = join(eval_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(eval_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y_target, sr=hparams.sample_rate)
# save figure
path = join(eval_dir, "step{:09d}_waveplots.png".format(global_step))
save_waveplot(path, y_hat, y_target)
# (T,)
y_hat = y_hat[idx].data.cpu().long().numpy()
y = y[idx].view(-1).data.cpu().long().numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y = P.inv_mulaw_quantize(y, hparams.quantize_channels)
else:
# (B, T)
y_hat = sample_from_discretized_mix_logistic(
y_hat, log_scale_min=hparams.log_scale_min)
# (T,)
y_hat = y_hat[idx].view(-1).data.cpu().numpy()
y = y[idx].view(-1).data.cpu().numpy()
if is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat, hparams.quantize_channels)
y = P.inv_mulaw(y, hparams.quantize_channels)
# Mask by length
y_hat[length:] = 0
y[length:] = 0
# Save audio
audio_dir = join(checkpoint_dir, "audio")
os.makedirs(audio_dir, exist_ok=True)
path = join(audio_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(audio_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y, sr=hparams.sample_rate)
# (T,)
y_hat = y_hat[idx].data.cpu().long().numpy()
y = y[idx].view(-1).data.cpu().long().numpy()
y_hat = P.inv_mulaw_quantize(y_hat, hparams.quantize_channels)
y = P.inv_mulaw_quantize(y, hparams.quantize_channels)
else:
# (B, T)
y_hat = sample_from_discretized_mix_logistic(
y_hat, log_scale_min=hparams.log_scale_min)
# (T,)
y_hat = y_hat[idx].view(-1).data.cpu().numpy()
y = y[idx].view(-1).data.cpu().numpy()
if is_mulaw(hparams.input_type):
y_hat = P.inv_mulaw(y_hat, hparams.quantize_channels)
y = P.inv_mulaw(y, hparams.quantize_channels)
# Mask by length
y_hat[length:] = 0
y[length:] = 0
# Save audio
audio_dir = join(checkpoint_dir, "audio")
os.makedirs(audio_dir, exist_ok=True)
path = join(audio_dir, "step{:09d}_predicted.wav".format(global_step))
librosa.output.write_wav(path, y_hat, sr=hparams.sample_rate)
path = join(audio_dir, "step{:09d}_target.wav".format(global_step))
librosa.output.write_wav(path, y, sr=hparams.sample_rate)