Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
X = FileSourceDataset(data_source)
assert len(X) == max_files
Y = data_source.labels
assert np.all(Y[:max_files // 2] == 0)
assert np.all(Y[max_files // 2:] == 1)
# Custum labelmap
data_source = MyTextDataSource(DATA_DIR, speakers=["225", "228"],
labelmap={"225": 225, "228": 228})
X = FileSourceDataset(data_source)
labels = data_source.labels
assert len(X) == len(labels)
assert (labels[:n_225] == 225).all()
assert (labels[n_225:] == 228).all()
class MyWavFileDataSource(vctk.WavFileDataSource):
def __init__(self, data_root, speakers, labelmap=None):
super(MyWavFileDataSource, self).__init__(data_root, speakers, labelmap)
self.alpha = pysptk.util.mcepalpha(48000)
def collect_features(self, path):
fs, x = wavfile.read(path)
assert fs == 48000
x = x.astype(np.float64)
f0, timeaxis = pyworld.dio(x, fs, frame_period=5)
f0 = pyworld.stonemask(x, f0, timeaxis, fs)
spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
spectrogram = trim_zeros_frames(spectrogram)
mc = pysptk.sp2mc(spectrogram, order=24, alpha=self.alpha)
return mc.astype(np.float32)
data_source = MyWavFileDataSource(DATA_DIR, speakers=["225"])
def test_vctk_dummy():
assert len(vctk.available_speakers) == 108
data_sources = [vctk.TranscriptionDataSource,
vctk.WavFileDataSource]
for data_source in data_sources:
@raises(RuntimeError)
def f(source):
source("dummy")
f(data_source)
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
speakers = vctk.available_speakers
td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
transcriptions = td.collect_files()
speaker_ids = td.labels
wav_paths = vctk.WavFileDataSource(
in_dir, speakers=speakers).collect_files()
for index, (speaker_id, text, wav_path) in enumerate(
zip(speaker_ids, transcriptions, wav_paths)):
futures.append(executor.submit(
partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
return [future.result() for future in tqdm(futures)]
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
speakers = vctk.available_speakers
td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
transcriptions = td.collect_files()
speaker_ids = td.labels
wav_paths = vctk.WavFileDataSource(
in_dir, speakers=speakers).collect_files()
for index, (speaker_id, text, wav_path) in enumerate(
zip(speaker_ids, transcriptions, wav_paths)):
futures.append(executor.submit(
partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
return [future.result() for future in tqdm(futures)]
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
speakers = vctk.available_speakers
td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
transcriptions = td.collect_files()
speaker_ids = td.labels
speaker_ids_unique = np.unique(speaker_ids)
speaker_to_speaker_id = {}
for i, j in zip(speakers, speaker_ids_unique):
speaker_to_speaker_id[i] = j
wav_paths = vctk.WavFileDataSource(
in_dir, speakers=speakers).collect_files()
_ignore_speaker = hparams.not_for_train_speaker.split(", ")
ignore_speaker = [speaker_to_speaker_id[i] for i in _ignore_speaker]
for index, (speaker_id, text, wav_path) in enumerate(
zip(speaker_ids, transcriptions, wav_paths)):
if speaker_id in ignore_speaker:
continue
futures.append(executor.submit(
partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
return [future.result() for future in tqdm(futures)]
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
speakers = vctk.available_speakers
td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
transcriptions = td.collect_files()
speaker_ids = td.labels
wav_paths = vctk.WavFileDataSource(
in_dir, speakers=speakers).collect_files()
for index, (speaker_id, text, wav_path) in enumerate(
zip(speaker_ids, transcriptions, wav_paths)):
futures.append(executor.submit(
partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
return [future.result() for future in tqdm(futures)]
args = parser.parse_args()
log_level = args.log.upper()
logging.getLogger().setLevel(log_level)
disfluencies = set(['uh', 'um'])
data_root = args.data_root
# Do for all speakers
speakers = vctk.available_speakers
# Collect all transcripts/wav files
td = vctk.TranscriptionDataSource(data_root, speakers=speakers)
transcriptions = td.collect_files()
wav_paths = vctk.WavFileDataSource(
data_root, speakers=speakers).collect_files()
# Save dir
save_dir = join(data_root, "lab")
if not exists(save_dir):
os.makedirs(save_dir)
resources = gentle.Resources()
for idx in tqdm(range(len(wav_paths))):
transcript = transcriptions[idx]
audiofile = wav_paths[idx]
lab_path = audiofile.replace("wav48/", "lab/").replace(".wav", ".lab")
print(transcript)
print(audiofile)
print(lab_path)
def collect_files(self):
return super(WavFileDataSource, self).collect_files(True)