How to use nnmnkwii - 10 common examples

To help you get started, we’ve selected a few nnmnkwii examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github r9y9 / nnmnkwii / tests / test_preprocessing.py View on Github external
for mu in [128, 256, 512]:
        for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
            y_hat = P.mulaw_quantize(x, mu)
            err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
            print(y, y_hat, err)
            assert np.allclose(y, y_hat)
            # have small quantize error
            assert err <= 0.1

    # ndarray input
    for mu in [128, 256, 512]:
        x = np.random.rand(10)
        y = P.mulaw(x, mu)
        x_hat = P.inv_mulaw(y, mu)
        assert np.allclose(x, x_hat)
        P.inv_mulaw_quantize(P.mulaw_quantize(x))

    # torch array input
    from warnings import warn
    import torch
    torch.manual_seed(1234)
    for mu in [128, 256, 512]:
        x = torch.rand(10)
        y = P.mulaw(x, mu)
        x_hat = P.inv_mulaw(y, mu)
        assert np.allclose(x, x_hat)
        P.inv_mulaw_quantize(P.mulaw_quantize(x))
github r9y9 / nnmnkwii / tests / test_preprocessing.py View on Github external
assert P.mulaw_quantize(-1.0, 2) == 0
    assert P.mulaw_quantize(-0.5, 2) == 0
    assert P.mulaw_quantize(-0.001, 2) == 0
    assert P.mulaw_quantize(0.0, 2) == 1
    assert P.mulaw_quantize(0.0001, 2) == 1
    assert P.mulaw_quantize(0.5, 2) == 1
    assert P.mulaw_quantize(0.99999, 2) == 1
    assert P.mulaw_quantize(1.0, 2) == 2

    np.random.seed(1234)
    # forward/backward correctness
    for mu in [128, 256, 512]:
        for x in np.random.rand(100):
            y = P.mulaw(x, mu)
            assert y >= 0 and y <= 1
            x_hat = P.inv_mulaw(y, mu)
            assert np.allclose(x, x_hat)

    # forward/backward correctness for quantize
    for mu in [128, 256, 512]:
        for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
            y_hat = P.mulaw_quantize(x, mu)
            err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
            print(y, y_hat, err)
            assert np.allclose(y, y_hat)
            # have small quantize error
            assert err <= 0.1

    # ndarray input
    for mu in [128, 256, 512]:
        x = np.random.rand(10)
        y = P.mulaw(x, mu)
github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
# Multi speakers
    data_source = MyFileDataSource(
        DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files)
    X = FileSourceDataset(data_source)
    assert len(X) == max_files

    # Speaker labels
    Y = data_source.labels
    assert np.all(Y[:max_files // 2] == 0)
    assert np.all(Y[max_files // 2:] == 1)

    # Custum speaker id
    data_source = MyFileDataSource(
        DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files,
        labelmap={"SF1": 1, "SF2": 0})
    X = FileSourceDataset(data_source)
    Y = data_source.labels
    assert np.all(Y[:max_files // 2] == 1)
    assert np.all(Y[max_files // 2:] == 0)

    # Use all data
    data_source = MyFileDataSource(
        DATA_DIR, speakers=["SF1", "SF2"], max_files=None)
    X = FileSourceDataset(data_source)
    assert len(X) == 162 * 2
github r9y9 / wavenet_vocoder / tests / test_model.py View on Github external
if returns_power:
        # (1 x N')
        p = librosa.feature.rmse(x, frame_length=256, hop_length=128)
        upsample_factor = x.size // p.size
        # (1 x N)
        p = np.repeat(p, upsample_factor, axis=-1)
        if p.size < x.size:
            # pad against time axis
            p = np.pad(p, [(0, 0), (0, x.size - p.size)], mode="constant", constant_values=0)

        # shape adajst
        p = p.reshape(1, 1, -1)

    # (T,)
    if mulaw:
        x = P.mulaw_quantize(x)
        x_org = P.inv_mulaw_quantize(x)
        # (C, T)
        x = to_categorical(x, num_classes=256).T
        # (1, C, T)
        x = x.reshape(1, 256, -1).astype(np.float32)
    else:
        x_org = x
        x = x.reshape(1, 1, -1)

    if returns_power:
        return x, x_org, p

    return x, x_org
github r9y9 / nnmnkwii / tests / test_preprocessing.py View on Github external
def test_mulaw():
    # Check corner cases
    assert P.mulaw_quantize(-1.0, 2) == 0
    assert P.mulaw_quantize(-0.5, 2) == 0
    assert P.mulaw_quantize(-0.001, 2) == 0
    assert P.mulaw_quantize(0.0, 2) == 1
    assert P.mulaw_quantize(0.0001, 2) == 1
    assert P.mulaw_quantize(0.5, 2) == 1
    assert P.mulaw_quantize(0.99999, 2) == 1
    assert P.mulaw_quantize(1.0, 2) == 2

    np.random.seed(1234)
    # forward/backward correctness
    for mu in [128, 256, 512]:
        for x in np.random.rand(100):
            y = P.mulaw(x, mu)
            assert y >= 0 and y <= 1
            x_hat = P.inv_mulaw(y, mu)
            assert np.allclose(x, x_hat)

    # forward/backward correctness for quantize
    for mu in [128, 256, 512]:
        for x, y in [(-1.0, 0), (0.0, mu // 2), (0.99999, mu - 1)]:
            y_hat = P.mulaw_quantize(x, mu)
            err = np.abs(x - P.inv_mulaw_quantize(y_hat, mu))
            print(y, y_hat, err)
            assert np.allclose(y, y_hat)
            # have small quantize error
            assert err <= 0.1

    # ndarray input
    for mu in [128, 256, 512]:
github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
X = FileSourceDataset(data_source)
    assert len(X) == max_files
    Y = data_source.labels
    assert np.all(Y[:max_files // 2] == 0)
    assert np.all(Y[max_files // 2:] == 1)

    # Custum labelmap
    data_source = MyTextDataSource(DATA_DIR, speakers=["225", "228"],
                                   labelmap={"225": 225, "228": 228})
    X = FileSourceDataset(data_source)
    labels = data_source.labels
    assert len(X) == len(labels)
    assert (labels[:n_225] == 225).all()
    assert (labels[n_225:] == 228).all()

    class MyWavFileDataSource(vctk.WavFileDataSource):
        def __init__(self, data_root, speakers, labelmap=None):
            super(MyWavFileDataSource, self).__init__(data_root, speakers, labelmap)
            self.alpha = pysptk.util.mcepalpha(48000)

        def collect_features(self, path):
            fs, x = wavfile.read(path)
            assert fs == 48000
            x = x.astype(np.float64)
            f0, timeaxis = pyworld.dio(x, fs, frame_period=5)
            f0 = pyworld.stonemask(x, f0, timeaxis, fs)
            spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
            spectrogram = trim_zeros_frames(spectrogram)
            mc = pysptk.sp2mc(spectrogram, order=24, alpha=self.alpha)
            return mc.astype(np.float32)

    data_source = MyWavFileDataSource(DATA_DIR, speakers=["225"])
github mertcokluk / GlotNET / train.py View on Github external
if c is not None:
        if hparams.upsample_conditional_features:
            c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
        else:
            c = c[idx, :, :length].unsqueeze(0)
        assert c.dim() == 3
        print("Shape of local conditioning features: {}".format(c.size()))
    if g is not None:
        # TODO: test
        g = g[idx]
        print("Shape of global conditioning features: {}".format(g.size()))

    # Dummy silence
    if is_mulaw_quantize(hparams.input_type):
        initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
    elif is_mulaw(hparams.input_type):
        initial_value = P.mulaw(0.0, hparams.quantize_channels)
    else:
        initial_value = 0.0
    print("Intial value:", initial_value)

    # (C,)
    if is_mulaw_quantize(hparams.input_type):
        initial_input = np_utils.to_categorical(
            initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
        initial_input = torch.from_numpy(initial_input).view(
            1, 1, hparams.quantize_channels)
    else:
        initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
    initial_input = initial_input.to(device)
github r9y9 / wavenet_vocoder / train.py View on Github external
if c is not None:
        if hparams.upsample_conditional_features:
            c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
        else:
            c = c[idx, :, :length].unsqueeze(0)
        assert c.dim() == 3
        print("Shape of local conditioning features: {}".format(c.size()))
    if g is not None:
        # TODO: test
        g = g[idx]
        print("Shape of global conditioning features: {}".format(g.size()))

    # Dummy silence
    if is_mulaw_quantize(hparams.input_type):
        initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
    elif is_mulaw(hparams.input_type):
        initial_value = P.mulaw(0.0, hparams.quantize_channels)
    else:
        initial_value = 0.0
    print("Intial value:", initial_value)

    # (C,)
    if is_mulaw_quantize(hparams.input_type):
        initial_input = np_utils.to_categorical(
            initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
        initial_input = torch.from_numpy(initial_input).view(
            1, 1, hparams.quantize_channels)
    else:
        initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
    initial_input = initial_input.to(device)
github mertcokluk / GlotNET / train.py View on Github external
if hparams.upsample_conditional_features:
            c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
        else:
            c = c[idx, :, :length].unsqueeze(0)
        assert c.dim() == 3
        print("Shape of local conditioning features: {}".format(c.size()))
    if g is not None:
        # TODO: test
        g = g[idx]
        print("Shape of global conditioning features: {}".format(g.size()))

    # Dummy silence
    if is_mulaw_quantize(hparams.input_type):
        initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
    elif is_mulaw(hparams.input_type):
        initial_value = P.mulaw(0.0, hparams.quantize_channels)
    else:
        initial_value = 0.0
    print("Intial value:", initial_value)

    # (C,)
    if is_mulaw_quantize(hparams.input_type):
        initial_input = np_utils.to_categorical(
            initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
        initial_input = torch.from_numpy(initial_input).view(
            1, 1, hparams.quantize_channels)
    else:
        initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
    initial_input = initial_input.to(device)

    # Run the model in fast eval mode
    with torch.no_grad():
github r9y9 / wavenet_vocoder / train.py View on Github external
if hparams.upsample_conditional_features:
            c = c[idx, :, :length // audio.get_hop_size()].unsqueeze(0)
        else:
            c = c[idx, :, :length].unsqueeze(0)
        assert c.dim() == 3
        print("Shape of local conditioning features: {}".format(c.size()))
    if g is not None:
        # TODO: test
        g = g[idx]
        print("Shape of global conditioning features: {}".format(g.size()))

    # Dummy silence
    if is_mulaw_quantize(hparams.input_type):
        initial_value = P.mulaw_quantize(0, hparams.quantize_channels)
    elif is_mulaw(hparams.input_type):
        initial_value = P.mulaw(0.0, hparams.quantize_channels)
    else:
        initial_value = 0.0
    print("Intial value:", initial_value)

    # (C,)
    if is_mulaw_quantize(hparams.input_type):
        initial_input = np_utils.to_categorical(
            initial_value, num_classes=hparams.quantize_channels).astype(np.float32)
        initial_input = torch.from_numpy(initial_input).view(
            1, 1, hparams.quantize_channels)
    else:
        initial_input = torch.zeros(1, 1, 1).fill_(initial_value)
    initial_input = initial_input.to(device)

    # Run the model in fast eval mode
    with torch.no_grad():