Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, sample_rate=16000, n_fft=400, win_length=None, hop_length=None, f_min=0., f_max=None,
pad=0, n_mels=128, window_fn=torch.hann_window, wkwargs=None):
super(MelSpectrogram, self).__init__()
self.sample_rate = sample_rate
self.n_fft = n_fft
self.win_length = win_length if win_length is not None else n_fft
self.hop_length = hop_length if hop_length is not None else self.win_length // 2
self.pad = pad
self.n_mels = n_mels # number of mel frequency bins
self.f_max = f_max
self.f_min = f_min
self.spectrogram = Spectrogram(n_fft=self.n_fft, win_length=self.win_length,
hop_length=self.hop_length,
pad=self.pad, window_fn=window_fn, power=2,
normalized=False, wkwargs=wkwargs)
self.mel_scale = MelScale(self.n_mels, self.sample_rate, self.f_min, self.f_max, self.n_fft // 2 + 1)
def _test_librosa_consistency_helper(n_fft, hop_length, power, n_mels, n_mfcc, sample_rate):
input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
sound, sample_rate = torchaudio.load(input_path)
sound_librosa = sound.cpu().numpy().squeeze() # (64000)
# test core spectrogram
spect_transform = torchaudio.transforms.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=2)
out_librosa, _ = librosa.core.spectrum._spectrogram(y=sound_librosa,
n_fft=n_fft,
hop_length=hop_length,
power=2)
out_torch = spect_transform(sound).squeeze().cpu()
self.assertTrue(torch.allclose(out_torch, torch.from_numpy(out_librosa), atol=1e-5))
# test mel spectrogram
melspect_transform = torchaudio.transforms.MelSpectrogram(
sample_rate=sample_rate, window_fn=torch.hann_window,
hop_length=hop_length, n_mels=n_mels, n_fft=n_fft)
librosa_mel = librosa.feature.melspectrogram(y=sound_librosa, sr=sample_rate,
n_fft=n_fft, hop_length=hop_length, n_mels=n_mels,
htk=True, norm=None)
librosa_mel_tensor = torch.from_numpy(librosa_mel)
def test_batch_spectrogram(self):
waveform, sample_rate = torchaudio.load(self.test_filepath)
# Single then transform then batch
expected = transforms.Spectrogram()(waveform).repeat(3, 1, 1, 1)
# Batch then transform
computed = transforms.Spectrogram()(waveform.repeat(3, 1, 1))
self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
self.assertTrue(torch.allclose(computed, expected))
def test_batch_spectrogram(self):
waveform, sample_rate = torchaudio.load(self.test_filepath)
# Single then transform then batch
expected = transforms.Spectrogram()(waveform).repeat(3, 1, 1, 1)
# Batch then transform
computed = transforms.Spectrogram()(waveform.repeat(3, 1, 1))
self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
self.assertTrue(torch.allclose(computed, expected))
def test_scriptmodule_Spectrogram(self):
tensor = torch.rand((1, 1000), device="cuda")
self._test_script_module(tensor, transforms.Spectrogram)
...
:param audio_conf: Dictionary containing the sample rate, window and the window length/stride in seconds
:param labels: String containing all the possible characters to map to
:param normalize: Apply standard mean and deviation normalization to audio tensor
:param augment(default False): Apply random tempo and gain perturbations
"""
self.labels_map = dict([(labels[i], i) for i in range(len(labels))])
self.window_stride = audio_conf["window_stride"]
self.window_size = audio_conf["window_size"]
self.sample_rate = audio_conf["sample_rate"]
self.window = windows_legacy.get(audio_conf["window"], windows_legacy["hamming"]) if legacy else windows.get(audio_conf["window"], windows["hamming"])
self.normalize = normalize
self.augment = augment
self.legacy = legacy
self.transform = torchaudio.transforms.Spectrogram(n_fft=int(self.sample_rate * self.window_size),
hop=int(self.sample_rate * self.window_stride),
window=self.window, normalize=self.normalize)
from __future__ import absolute_import, division, print_function, unicode_literals
import torchaudio
# TODO See https://github.com/pytorch/audio/issues/165
class Spectrogram:
forward = torchaudio.transforms.Spectrogram().forward
class AmplitudeToDB:
forward = torchaudio.transforms.AmplitudeToDB().forward
class MelScale:
forward = torchaudio.transforms.MelScale().forward
class MelSpectrogram:
forward = torchaudio.transforms.MelSpectrogram().forward
class MFCC:
forward = torchaudio.transforms.MFCC().forward
self.win_length = n_window_size
self.hop_length = n_window_stride
self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length))
# Set window_fn. None defaults to torch.ones.
window_fn = self.torch_windows.get(window, None)
if window_fn is None:
raise ValueError(
f"Window argument for AudioProcessor is invalid: {window}."
f"For no window function, use 'ones' or None.")
# Create featurizer.
# Calls torch.stft under the hood, and is hard-coded to use center=True
self.featurizer = torchaudio.transforms.Spectrogram(
n_fft=self.n_fft,
win_length=self.win_length,
hop_length=self.hop_length,
window_fn=window_fn,
normalized=normalized
)
self.featurizer.to(self._device)
...
:param audio_conf: Dictionary containing the sample rate, window and the window length/stride in seconds
:param labels: String containing all the possible characters to map to
:param normalize: Apply standard mean and deviation normalization to audio tensor
:param augment(default False): Apply random tempo and gain perturbations
"""
self.labels_map = dict([(labels[i], i) for i in range(len(labels))])
self.window_stride = audio_conf["window_stride"]
self.window_size = audio_conf["window_size"]
self.sample_rate = audio_conf["sample_rate"]
self.window = windows_legacy.get(audio_conf["window"], windows_legacy["hamming"]) if legacy else windows.get(audio_conf["window"], windows["hamming"])
self.normalize = normalize
self.augment = augment
self.legacy = legacy
self.transform = torchaudio.transforms.Spectrogram(n_fft=int(self.sample_rate * self.window_size),
hop=int(self.sample_rate * self.window_stride),
window=self.window, normalize=self.normalize)