How to use the nnmnkwii.datasets.jsut function in nnmnkwii

To help you get started, we’ve selected a few nnmnkwii examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hash2430 / dv3_world / tests / test_frontend.py View on Github external
def test_ja_jsut():
    f = getattr(frontend, "jp")
    from nnmnkwii.datasets import jsut
    from tqdm import trange
    import jaconv

    d = jsut.TranscriptionDataSource("/home/ryuichi/data/jsut_ver1.1/",
                                     subsets=jsut.available_subsets)
    texts = d.collect_files()

    for p in [0.0, 0.5, 1.0]:
        for idx in trange(len(texts)):
            text = texts[idx]
            seq = f.text_to_sequence(text, p=p)
            assert seq[-1] == eos
            t = f.sequence_to_text(seq)

            if idx < 10:
                print("""{0}: {1}\n{0}: {2}\n""".format(idx, text, t))
github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
def test_jsut_dummy():
    data_sources = [jsut.TranscriptionDataSource,
                    jsut.WavFileDataSource]

    for data_source in data_sources:
        @raises(RuntimeError)
        def f(source):
            source("dummy")

        f(data_source)
github hash2430 / dv3_world / tests / test_frontend.py View on Github external
def test_ja_jsut():
    f = getattr(frontend, "jp")
    from nnmnkwii.datasets import jsut
    from tqdm import trange
    import jaconv

    d = jsut.TranscriptionDataSource("/home/ryuichi/data/jsut_ver1.1/",
                                     subsets=jsut.available_subsets)
    texts = d.collect_files()

    for p in [0.0, 0.5, 1.0]:
        for idx in trange(len(texts)):
            text = texts[idx]
            seq = f.text_to_sequence(text, p=p)
            assert seq[-1] == eos
            t = f.sequence_to_text(seq)

            if idx < 10:
                print("""{0}: {1}\n{0}: {2}\n""".format(idx, text, t))
github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
assert X2[0] == u"あなたの荷物は、ロサンゼルスに残っています。"

    # Multiple subsets
    data_source = MyTextDataSource(DATA_DIR, subsets=["basic5000", "travel1000"])
    X3 = FileSourceDataset(data_source)
    assert X3[0] == u"水をマレーシアから買わなくてはならないのです。"
    assert len(X3) == len(X1) + len(X2)

    # All subsets
    data_source = MyTextDataSource(DATA_DIR, subsets=jsut.available_subsets)
    X = FileSourceDataset(data_source)
    # As of 2017/11/2. There were 30 missing wav files.
    # This should be 7696
    assert len(X) == 7696

    class MyWavFileDataSource(jsut.WavFileDataSource):
        def __init__(self, data_root, subsets):
            super(MyWavFileDataSource, self).__init__(data_root, subsets)
            self.alpha = pysptk.util.mcepalpha(48000)

        def collect_features(self, path):
            fs, x = wavfile.read(path)
            assert fs == 48000
            x = x.astype(np.float64)
            f0, timeaxis = pyworld.dio(x, fs, frame_period=5)
            f0 = pyworld.stonemask(x, f0, timeaxis, fs)
            spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
            spectrogram = trim_zeros_frames(spectrogram)
            mc = pysptk.sp2mc(spectrogram, order=24, alpha=self.alpha)
            return mc.astype(np.float32)

    data_source = MyWavFileDataSource(DATA_DIR, subsets=["basic5000"])
github r9y9 / deepvoice3_pytorch / tests / test_frontend.py View on Github external
@attr("local_only")
def test_ja_jsut():
    f = getattr(frontend, "jp")
    from nnmnkwii.datasets import jsut
    from tqdm import trange
    import jaconv

    d = jsut.TranscriptionDataSource("/home/ryuichi/data/jsut_ver1.1/",
                                     subsets=jsut.available_subsets)
    texts = d.collect_files()

    for p in [0.0, 0.5, 1.0]:
        for idx in trange(len(texts)):
            text = texts[idx]
            seq = f.text_to_sequence(text, p=p)
            assert seq[-1] == eos
            t = f.sequence_to_text(seq)

            if idx < 10:
                print("""{0}: {1}\n{0}: {2}\n""".format(idx, text, t))
github Sharad24 / Neural-Voice-Cloning-with-Few-Samples / dv3 / jsut.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    transcriptions = jsut.TranscriptionDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()
    wav_paths = jsut.WavFileDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()

    for index, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github r9y9 / deepvoice3_pytorch / jsut.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    transcriptions = jsut.TranscriptionDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()
    wav_paths = jsut.WavFileDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()

    for index, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github r9y9 / deepvoice3_pytorch / jsut.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    transcriptions = jsut.TranscriptionDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()
    wav_paths = jsut.WavFileDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()

    for index, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github r9y9 / wavenet_vocoder / jsut.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    transcriptions = jsut.TranscriptionDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()
    wav_paths = jsut.WavFileDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()

    for index, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github r9y9 / wavenet_vocoder / jsut.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    transcriptions = jsut.TranscriptionDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()
    wav_paths = jsut.WavFileDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()

    for index, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, wav_path, text)))
    return [future.result() for future in tqdm(futures)]