Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_linguistic_features_for_acoustic_model():
qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed")
binary_dict, continuous_dict = hts.load_question_set(qs_file_name)
# Linguistic features
# To train acoustic model paired with linguistic features,
# we need frame-level linguistic feature representation.
input_state_label = join(DATA_DIR, "label_state_align", "arctic_a0001.lab")
labels = hts.load(input_state_label)
assert labels.is_state_alignment_label()
x = fe.linguistic_features(labels,
binary_dict,
continuous_dict,
add_frame_features=True,
subphone_features="full"
)
y = np.fromfile(join(DATA_DIR, "binary_label_425",
"arctic_a0001.lab"), dtype=np.float32).reshape(-1, x.shape[-1])
assert np.allclose(x, y)
def test_hts_labels_contains_multiple_whitespaces():
lab_path = join(DATA_DIR, "p225_001.lab")
labels = hts.load(lab_path)
print(labels)
def _process_utterance(out_dir, index, wav_path, text):
# Load the audio to a numpy array:
wav = audio.load_wav(wav_path)
sr = hparams.sample_rate
if hparams.rescaling:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# Trim silence from hts labels if available
lab_path = wav_path.replace("wav/", "lab/").replace(".wav", ".lab")
if exists(lab_path):
labels = hts.load(lab_path)
assert "sil" in labels[0][-1]
assert "sil" in labels[-1][-1]
b = int(labels[0][1] * 1e-7 * sr)
e = int(labels[-1][0] * 1e-7 * sr)
wav = wav[b:e]
else:
wav, _ = librosa.effects.trim(wav, top_db=30)
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
def _process_utterance(out_dir, index, speaker_id, wav_path, text):
fs = hparams.sample_rate
# Load the audio to a numpy array. Resampled if needed
wav = audio.load_wav(wav_path)
lab_path = wav_path.replace("wav/", "lab/").replace(".wav", ".lab")
# Trim silence from hts labels if available
# TODO
if exists(lab_path) and False:
labels = hts.load(lab_path)
b = int(start_at(labels) * 1e-7 * sr)
e = int(end_at(labels) * 1e-7 * sr)
wav = wav[b:e]
wav, _ = librosa.effects.trim(wav, top_db=20)
else:
wav, _ = librosa.effects.trim(wav, top_db=20)
if hparams.rescaling:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
out = wav
constant_values = 0.0
out_dtype = np.float32
p_vt=5
p_gl=5
def _process_utterance(out_dir, index, speaker_id, wav_path, text, silence_threshold, fft_size, ):
sr = hparams.sample_rate
# Load the audio to a numpy array. Resampled if needed
wav = audio.load_wav(wav_path)
lab_path = wav_path.replace("wav/", "lab/").replace(".wav", ".lab")
# Trim silence from hts labels if available
# TODO
if exists(lab_path) and False:
labels = hts.load(lab_path)
b = int(start_at(labels) * 1e-7 * sr)
e = int(end_at(labels) * 1e-7 * sr)
wav = wav[b:e]
wav, _ = librosa.effects.trim(wav, top_db=20)
else:
wav, _ = librosa.effects.trim(wav, top_db=20)
# Mu-law quantize
quantized = P.mulaw_quantize(wav)
# Trim silences
start, end = audio.start_and_end_indices(quantized, silence_threshold)
quantized = quantized[start:end]
wav = wav[start:end]
# Compute a mel-scale spectrogram from the trimmed wav:
def _process_utterance(out_dir, index, speaker_id, wav_path, text):
sr = hparams.sample_rate
# Load the audio to a numpy array:
wav = dv3.audio.load_wav(wav_path)
lab_path = wav_path.replace("wav48/", "lab/").replace(".wav", ".lab")
# Trim silence from hts labels if available
if exists(lab_path):
labels = hts.load(lab_path)
b = int(start_at(labels) * 1e-7 * sr)
e = int(end_at(labels) * 1e-7 * sr)
wav = wav[b:e]
wav, _ = librosa.effects.trim(wav, top_db=25)
else:
wav, _ = librosa.effects.trim(wav, top_db=15)
# Compute the linear-scale spectrogram from the wav:
spectrogram = dv3.audio.spectrogram(wav).astype(np.float32)
n_frames = spectrogram.shape[1]
# Compute a mel-scale spectrogram from the wav:
mel_spectrogram = dv3.audio.melspectrogram(wav).astype(np.float32)
# Write the spectrograms to disk:
spectrogram_filename = 'vctk-spec-%05d.npy' % index
# uv = (lf0 != 0).astype(np.float32)
# continuous lf0 or not
lf0 = interp1d(lf0, kind='slinear')
# order 59
# mgc dim 60*3
# mgc = apply_delta_windows(mgc, hparams.windows)
# # lf0 dim 1*3
# lf0 = apply_delta_windows(lf0, hparams.windows)
features = np.hstack((mgc, lf0))
# cut silence frames by hts alignment
if label_path is not None:
labels = hts.load(label_path)
features = features[:labels.num_frames()]
indices = labels.silence_frame_indices()
features = np.delete(features, indices, axis=0)
return features.astype(np.float32)