How to use the nnmnkwii.datasets.vctk.WavFileDataSource function in nnmnkwii

To help you get started, we’ve selected a few nnmnkwii examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
X = FileSourceDataset(data_source)
    assert len(X) == max_files
    Y = data_source.labels
    assert np.all(Y[:max_files // 2] == 0)
    assert np.all(Y[max_files // 2:] == 1)

    # Custum labelmap
    data_source = MyTextDataSource(DATA_DIR, speakers=["225", "228"],
                                   labelmap={"225": 225, "228": 228})
    X = FileSourceDataset(data_source)
    labels = data_source.labels
    assert len(X) == len(labels)
    assert (labels[:n_225] == 225).all()
    assert (labels[n_225:] == 228).all()

    class MyWavFileDataSource(vctk.WavFileDataSource):
        def __init__(self, data_root, speakers, labelmap=None):
            super(MyWavFileDataSource, self).__init__(data_root, speakers, labelmap)
            self.alpha = pysptk.util.mcepalpha(48000)

        def collect_features(self, path):
            fs, x = wavfile.read(path)
            assert fs == 48000
            x = x.astype(np.float64)
            f0, timeaxis = pyworld.dio(x, fs, frame_period=5)
            f0 = pyworld.stonemask(x, f0, timeaxis, fs)
            spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
            spectrogram = trim_zeros_frames(spectrogram)
            mc = pysptk.sp2mc(spectrogram, order=24, alpha=self.alpha)
            return mc.astype(np.float32)

    data_source = MyWavFileDataSource(DATA_DIR, speakers=["225"])
github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
def test_vctk_dummy():
    assert len(vctk.available_speakers) == 108
    data_sources = [vctk.TranscriptionDataSource,
                    vctk.WavFileDataSource]

    for data_source in data_sources:
        @raises(RuntimeError)
        def f(source):
            source("dummy")

        f(data_source)
github Sharad24 / Neural-Voice-Cloning-with-Few-Samples / dv3 / vctk.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    wav_paths = vctk.WavFileDataSource(
        in_dir, speakers=speakers).collect_files()

    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github Sharad24 / Neural-Voice-Cloning-with-Few-Samples / deepvoice3_pytorch / vctk.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    wav_paths = vctk.WavFileDataSource(
        in_dir, speakers=speakers).collect_files()

    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github hash2430 / dv3_world / vctk.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    speaker_ids_unique = np.unique(speaker_ids)
    speaker_to_speaker_id = {}
    for i, j in zip(speakers, speaker_ids_unique):
        speaker_to_speaker_id[i] = j
    wav_paths = vctk.WavFileDataSource(
        in_dir, speakers=speakers).collect_files()

    _ignore_speaker = hparams.not_for_train_speaker.split(", ")
    ignore_speaker = [speaker_to_speaker_id[i] for i in _ignore_speaker]
    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        if speaker_id in ignore_speaker:
            continue
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github r9y9 / deepvoice3_pytorch / vctk.py View on Github external
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    wav_paths = vctk.WavFileDataSource(
        in_dir, speakers=speakers).collect_files()

    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
github r9y9 / deepvoice3_pytorch / vctk_preprocess / prepare_htk_alignments_vctk.py View on Github external
args = parser.parse_args()

    log_level = args.log.upper()
    logging.getLogger().setLevel(log_level)
    disfluencies = set(['uh', 'um'])

    data_root = args.data_root

    # Do for all speakers
    speakers = vctk.available_speakers

    # Collect all transcripts/wav files
    td = vctk.TranscriptionDataSource(data_root, speakers=speakers)
    transcriptions = td.collect_files()
    wav_paths = vctk.WavFileDataSource(
        data_root, speakers=speakers).collect_files()

    # Save dir
    save_dir = join(data_root, "lab")
    if not exists(save_dir):
        os.makedirs(save_dir)

    resources = gentle.Resources()

    for idx in tqdm(range(len(wav_paths))):
        transcript = transcriptions[idx]
        audiofile = wav_paths[idx]
        lab_path = audiofile.replace("wav48/", "lab/").replace(".wav", ".lab")
        print(transcript)
        print(audiofile)
        print(lab_path)
github r9y9 / nnmnkwii / nnmnkwii / datasets / vctk.py View on Github external
def collect_files(self):
        return super(WavFileDataSource, self).collect_files(True)