Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_mulaw():
# Check corner cases
assert P.mulaw_quantize(-1.0, 2) == 0
assert P.mulaw_quantize(-0.5, 2) == 0
assert P.mulaw_quantize(-0.001, 2) == 0
assert P.mulaw_quantize(0.0, 2) == 1
assert P.mulaw_quantize(0.0001, 2) == 1
assert P.mulaw_quantize(0.5, 2) == 1
assert P.mulaw_quantize(0.99999, 2) == 1
assert P.mulaw_quantize(1.0, 2) == 2
np.random.seed(1234)
# forward/backward correctness
for mu in [128, 256, 512]:
for x in np.random.rand(100):
y = P.mulaw(x, mu)
assert y >= 0 and y <= 1
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
# forward/backward correctness for quantize
for mu in [128, 256, 512]:
for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
y_hat = P.mulaw_quantize(x, mu)
err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
print(y, y_hat, err)
assert np.allclose(y, y_hat)
# have small quantize error
assert err <= 0.1
# ndarray input
for mu in [128, 256, 512]:
if hparams.upsample_conditional_features:
c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
else:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Intial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = torch.from_numpy(initial_input).view(
1, 1, hparams.quantize_channels)
else:
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
# Run the model in fast eval mode
with torch.no_grad():
if hparams.upsample_conditional_features:
c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
else:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Intial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = torch.from_numpy(initial_input).view(
1, 1, hparams.quantize_channels)
else:
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
# Run the model in fast eval mode
with torch.no_grad():
# ndarray input
for mu in [128, 256, 512]:
x = np.random.rand(10)
y = P.mulaw(x, mu)
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
P.inv_mulaw_quantize(P.mulaw_quantize(x))
# torch array input
from warnings import warn
import torch
torch.manual_seed(1234)
for mu in [128, 256, 512]:
x = torch.rand(10)
y = P.mulaw(x, mu)
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
P.inv_mulaw_quantize(P.mulaw_quantize(x))
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
out = out[start:end]
constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
out_dtype = np.int16
elif is_mulaw(hparams.input_type):
# [-1, 1]
out = P.mulaw(wav, hparams.quantize_channels)
constant_values = P.mulaw(0.0, hparams.quantize_channels)
out_dtype = np.float32
else:
# [-1, 1]
out = wav
constant_values = 0.0
out_dtype = np.float32
# Compute a mel-scale spectrogram from the trimmed wav:
# (N, D)
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
# lws pads zeros internally before performing stft
# this is needed to adjast time resolution between audio and mel-spectrogram
l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())
# zero pad for quantized signal
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
out = out[start:end]
constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
out_dtype = np.int16
elif is_mulaw(hparams.input_type):
# [-1, 1]
out = P.mulaw(wav, hparams.quantize_channels)
constant_values = P.mulaw(0.0, hparams.quantize_channels)
out_dtype = np.float32
else:
# [-1, 1]
out = wav
constant_values = 0.0
out_dtype = np.float32
# Compute a mel-scale spectrogram from the trimmed wav:
# (N, D)
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
# lws pads zeros internally before performing stft
# this is needed to adjust time resolution between audio and mel-spectrogram
l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())
# zero pad for quantized signal
y_target = y[idx].view(-1).data.cpu().numpy()[:length]
if c is not None:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Intial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
# Run the model in fast eval mode
y_hat = model.incremental_forward(
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
out = out[start:end]
constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
out_dtype = np.int16
elif is_mulaw(hparams.input_type):
# [-1, 1]
out = P.mulaw(wav, hparams.quantize_channels)
constant_values = P.mulaw(0.0, hparams.quantize_channels)
out_dtype = np.float32
else:
# [-1, 1]
out = wav
constant_values = 0.0
out_dtype = np.float32
# Compute a mel-scale spectrogram from the trimmed wav:
# (N, D)
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
# lws pads zeros internally before performing stft
# this is needed to adjust time resolution between audio and mel-spectrogram
l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())
# zero pad for quantized signal
out = np.pad(out, (l, r), mode="constant", constant_values=constant_values)
y_target = y[idx].view(-1).data.cpu().numpy()[:length]
if c is not None:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Initial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = Variable(torch.from_numpy(initial_input)).view(
1, 1, hparams.quantize_channels)
else:
initial_input = Variable(torch.zeros(1, 1, 1).fill_(initial_value))
initial_input = initial_input.cuda() if use_cuda else initial_input
y_hat, c_hat = model.incremental_forward(
initial_input, c=c, g=g, T=length, tqdm=tqdm, softmax=True, quantize=True,
log_scale_min=hparams.log_scale_min)
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
out = out[start:end]
constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
out_dtype = np.int16
elif is_mulaw(hparams.input_type):
# [-1, 1]
out = P.mulaw(wav, hparams.quantize_channels)
constant_values = P.mulaw(0.0, hparams.quantize_channels)
out_dtype = np.float32
else:
# [-1, 1]
out = wav
constant_values = 0.0
out_dtype = np.float32
# Compute a mel-scale spectrogram from the trimmed wav:
# (N, D)
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
# lws pads zeros internally before performing stft
# this is needed to adjust time resolution between audio and mel-spectrogram
l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())
# zero pad for quantized signal