Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_resample_size(self):
input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
waveform, sample_rate = torchaudio.load(input_path)
upsample_rate = sample_rate * 2
downsample_rate = sample_rate // 2
invalid_resample = torchaudio.transforms.Resample(sample_rate, upsample_rate, resampling_method='foo')
self.assertRaises(ValueError, invalid_resample, waveform)
upsample_resample = torchaudio.transforms.Resample(
sample_rate, upsample_rate, resampling_method='sinc_interpolation')
up_sampled = upsample_resample(waveform)
# we expect the upsampled signal to have twice as many samples
self.assertTrue(up_sampled.size(-1) == waveform.size(-1) * 2)
downsample_resample = torchaudio.transforms.Resample(
sample_rate, downsample_rate, resampling_method='sinc_interpolation')
down_sampled = downsample_resample(waveform)
# we expect the downsampled signal to have half as many samples
self.assertTrue(down_sampled.size(-1) == waveform.size(-1) // 2)
def test_resample_size(self):
input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
waveform, sample_rate = torchaudio.load(input_path)
upsample_rate = sample_rate * 2
downsample_rate = sample_rate // 2
invalid_resample = torchaudio.transforms.Resample(sample_rate, upsample_rate, resampling_method='foo')
self.assertRaises(ValueError, invalid_resample, waveform)
upsample_resample = torchaudio.transforms.Resample(
sample_rate, upsample_rate, resampling_method='sinc_interpolation')
up_sampled = upsample_resample(waveform)
# we expect the upsampled signal to have twice as many samples
self.assertTrue(up_sampled.size(-1) == waveform.size(-1) * 2)
downsample_resample = torchaudio.transforms.Resample(
sample_rate, downsample_rate, resampling_method='sinc_interpolation')
down_sampled = downsample_resample(waveform)
# we expect the downsampled signal to have half as many samples
self.assertTrue(down_sampled.size(-1) == waveform.size(-1) // 2)
def test_vctk_transform_pipeline(self):
test_filepath_vctk = os.path.join(self.test_dirpath, "assets/VCTK-Corpus/wav48/p224/", "p224_002.wav")
wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk)
# rate
sample = T.Resample(sr_vctk, 16000, resampling_method='sinc_interpolation')
wf_vctk = sample(wf_vctk)
# dither
wf_vctk = F.dither(wf_vctk, noise_shaping=True)
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(test_filepath_vctk)
E.append_effect_to_chain("gain", ["-h"])
E.append_effect_to_chain("channels", [1])
E.append_effect_to_chain("rate", [16000])
E.append_effect_to_chain("gain", ["-rh"])
E.append_effect_to_chain("dither", ["-s"])
wf_vctk_sox = E.sox_build_flow_effects()[0]
self.assertTrue(torch.allclose(wf_vctk, wf_vctk_sox, rtol=1e-03, atol=1e-03))
def __init__(self, transforms=None, shiftRandom=True, validationAudioPath=None):
self.transforms = transforms
self.preview = validationAudioPath is not None
self.shiftRandom = shiftRandom and not self.preview
self.count = None
animFPS = 29.97 # samSoar recorded with an ipad
if self.preview:
inputSpeechPath = validationAudioPath
else:
inputSpeechPath = os.path.join(ROOT_PATH, 'data', 'samSoar', 'samSoar.wav')
self.waveform, self.sampleRate = torchaudio.load(inputSpeechPath)
if self.sampleRate != 16000:
self.waveform = torchaudio.transforms.Resample(self.sampleRate, 16000)(self.waveform)
self.sampleRate = 16000
self.count = int(animFPS * (self.waveform.size()[1] / self.sampleRate))
self.LPC = lpc.LPCCoefficients(
self.sampleRate,
.032,
.5,
order=31 # 32 - 1
)
if os.path.exists(INPUT_VALUES_PRECALC_PATH):
self.inputValues = torch.load(INPUT_VALUES_PRECALC_PATH)
else:
print('pre-calculating input values...')
self.inputValues = torch.Tensor([])
if not self.training:
unmix_target = model.load_model(
target=target,
model_name=self.model_name,
device=self.device
)
unmix_target.freeze()
else:
unmix_target = self.target_models[j]
model_rate = unmix_target.sample_rate.item()
if rate != model_rate:
# we have to resample to model samplerate if needed
# this makes sure we resample input only once
model_rate = unmix_target.sample_rate.item()
resampler = torchaudio.transforms.Resample(
orig_freq=rate,
new_freq=model_rate).to(self.device)
# Until torchaudio has not merged the PR
# https://github.com/pytorch/audio/pull/277 , There is a need
# to do the resampling on cpu
audio = torch.cat(
[resampler(audio[sample])[None, ...].cpu()
for sample in range(nb_samples)],
dim=0).to(self.device)
rate = model_rate
# apply current model to get the source spectrogram
Vj = unmix_target(audio)
if self.softmask:
# only exponentiate the model if we use softmask
class MFCC:
forward = torchaudio.transforms.MFCC().forward
class MuLawEncoding:
forward = torchaudio.transforms.MuLawEncoding().forward
class MuLawDecoding:
forward = torchaudio.transforms.MuLawDecoding().forward
class Resample:
# Resample isn't a script_method
forward = torchaudio.transforms.Resample.forward
def __init__(self, orig_freq=16000, new_freq=16000, resampling_method='sinc_interpolation'):
super(Resample, self).__init__()
self.orig_freq = orig_freq
self.new_freq = new_freq
self.resampling_method = resampling_method