Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for mu in [128, 256, 512]:
for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
y_hat = P.mulaw_quantize(x, mu)
err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
print(y, y_hat, err)
assert np.allclose(y, y_hat)
# have small quantize error
assert err <= 0.1
# ndarray input
for mu in [128, 256, 512]:
x = np.random.rand(10)
y = P.mulaw(x, mu)
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
P.inv_mulaw_quantize(P.mulaw_quantize(x))
# torch array input
from warnings import warn
import torch
torch.manual_seed(1234)
for mu in [128, 256, 512]:
x = torch.rand(10)
y = P.mulaw(x, mu)
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
P.inv_mulaw_quantize(P.mulaw_quantize(x))
assert P.mulaw_quantize(-1.0, 2) == 0
assert P.mulaw_quantize(-0.5, 2) == 0
assert P.mulaw_quantize(-0.001, 2) == 0
assert P.mulaw_quantize(0.0, 2) == 1
assert P.mulaw_quantize(0.0001, 2) == 1
assert P.mulaw_quantize(0.5, 2) == 1
assert P.mulaw_quantize(0.99999, 2) == 1
assert P.mulaw_quantize(1.0, 2) == 2
np.random.seed(1234)
# forward/backward correctness
for mu in [128, 256, 512]:
for x in np.random.rand(100):
y = P.mulaw(x, mu)
assert y >= 0 and y <= 1
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
# forward/backward correctness for quantize
for mu in [128, 256, 512]:
for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
y_hat = P.mulaw_quantize(x, mu)
err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
print(y, y_hat, err)
assert np.allclose(y, y_hat)
# have small quantize error
assert err <= 0.1
# ndarray input
for mu in [128, 256, 512]:
x = np.random.rand(10)
y = P.mulaw(x, mu)
# Multi speakers
data_source = MyFileDataSource(
DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files)
X = FileSourceDataset(data_source)
assert len(X) == max_files
# Speaker labels
Y = data_source.labels
assert np.all(Y[:max_files // 2] == 0)
assert np.all(Y[max_files // 2:] == 1)
# Custum speaker id
data_source = MyFileDataSource(
DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files,
labelmap={"SF1": 1, "SF2": 0})
X = FileSourceDataset(data_source)
Y = data_source.labels
assert np.all(Y[:max_files // 2] == 1)
assert np.all(Y[max_files // 2:] == 0)
# Use all data
data_source = MyFileDataSource(
DATA_DIR, speakers=["SF1", "SF2"], max_files=None)
X = FileSourceDataset(data_source)
assert len(X) == 162 * 2
if returns_power:
# (1 x N')
p = librosa.feature.rmse(x, frame_length=256, hop_length=128)
upsample_factor = x.size // p.size
# (1 x N)
p = np.repeat(p, upsample_factor, axis=-1)
if p.size < x.size:
# pad against time axis
p = np.pad(p, [(0, 0), (0, x.size - p.size)], mode="constant", constant_values=0)
# shape adajst
p = p.reshape(1, 1, -1)
# (T,)
if mulaw:
x = P.mulaw_quantize(x)
x_org = P.inv_mulaw_quantize(x)
# (C, T)
x = to_categorical(x, num_classes=256).T
# (1, C, T)
x = x.reshape(1, 256, -1).astype(np.float32)
else:
x_org = x
x = x.reshape(1, 1, -1)
if returns_power:
return x, x_org, p
return x, x_org
def test_mulaw():
# Check corner cases
assert P.mulaw_quantize(-1.0, 2) == 0
assert P.mulaw_quantize(-0.5, 2) == 0
assert P.mulaw_quantize(-0.001, 2) == 0
assert P.mulaw_quantize(0.0, 2) == 1
assert P.mulaw_quantize(0.0001, 2) == 1
assert P.mulaw_quantize(0.5, 2) == 1
assert P.mulaw_quantize(0.99999, 2) == 1
assert P.mulaw_quantize(1.0, 2) == 2
np.random.seed(1234)
# forward/backward correctness
for mu in [128, 256, 512]:
for x in np.random.rand(100):
y = P.mulaw(x, mu)
assert y >= 0 and y <= 1
x_hat = P.inv_mulaw(y, mu)
assert np.allclose(x, x_hat)
# forward/backward correctness for quantize
for mu in [128, 256, 512]:
for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
y_hat = P.mulaw_quantize(x, mu)
err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
print(y, y_hat, err)
assert np.allclose(y, y_hat)
# have small quantize error
assert err <= 0.1
# ndarray input
for mu in [128, 256, 512]:
X = FileSourceDataset(data_source)
assert len(X) == max_files
Y = data_source.labels
assert np.all(Y[:max_files // 2] == 0)
assert np.all(Y[max_files // 2:] == 1)
# Custum labelmap
data_source = MyTextDataSource(DATA_DIR, speakers=["225", "228"],
labelmap={"225": 225, "228": 228})
X = FileSourceDataset(data_source)
labels = data_source.labels
assert len(X) == len(labels)
assert (labels[:n_225] == 225).all()
assert (labels[n_225:] == 228).all()
class MyWavFileDataSource(vctk.WavFileDataSource):
def __init__(self, data_root, speakers, labelmap=None):
super(MyWavFileDataSource, self).__init__(data_root, speakers, labelmap)
self.alpha = pysptk.util.mcepalpha(48000)
def collect_features(self, path):
fs, x = wavfile.read(path)
assert fs == 48000
x = x.astype(np.float64)
f0, timeaxis = pyworld.dio(x, fs, frame_period=5)
f0 = pyworld.stonemask(x, f0, timeaxis, fs)
spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
spectrogram = trim_zeros_frames(spectrogram)
mc = pysptk.sp2mc(spectrogram, order=24, alpha=self.alpha)
return mc.astype(np.float32)
data_source = MyWavFileDataSource(DATA_DIR, speakers=["225"])
if c is not None:
if hparams.upsample_conditional_features:
c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
else:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Intial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = torch.from_numpy(initial_input).view(
1, 1, hparams.quantize_channels)
else:
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
if c is not None:
if hparams.upsample_conditional_features:
c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
else:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Intial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = torch.from_numpy(initial_input).view(
1, 1, hparams.quantize_channels)
else:
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
if hparams.upsample_conditional_features:
c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
else:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Intial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = torch.from_numpy(initial_input).view(
1, 1, hparams.quantize_channels)
else:
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
# Run the model in fast eval mode
with torch.no_grad():
if hparams.upsample_conditional_features:
c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
else:
c = c[idx, :, :length].unsqueeze(0)
assert c.dim() == 3
print("Shape of local conditioning features: {}".format(c.size()))
if g is not None:
# TODO: test
g = g[idx]
print("Shape of global conditioning features: {}".format(g.size()))
# Dummy silence
if is_mulaw_quantize(hparams.input_type):
initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
elif is_mulaw(hparams.input_type):
initial_value = P.mulaw(0.0, hparams.quantize_channels)
else:
initial_value = 0.0
print("Intial value:", initial_value)
# (C,)
if is_mulaw_quantize(hparams.input_type):
initial_input = np_utils.to_categorical(
initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
initial_input = torch.from_numpy(initial_input).view(
1, 1, hparams.quantize_channels)
else:
initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
initial_input = initial_input.to(device)
# Run the model in fast eval mode
with torch.no_grad():