How to use the torchaudio.transforms.Spectrogram function in torchaudio

To help you get started, we’ve selected a few torchaudio examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pytorch / audio / torchaudio / transforms.py View on Github external
def __init__(self, sample_rate=16000, n_fft=400, win_length=None, hop_length=None, f_min=0., f_max=None,
                 pad=0, n_mels=128, window_fn=torch.hann_window, wkwargs=None):
        super(MelSpectrogram, self).__init__()
        self.sample_rate = sample_rate
        self.n_fft = n_fft
        self.win_length = win_length if win_length is not None else n_fft
        self.hop_length = hop_length if hop_length is not None else self.win_length // 2
        self.pad = pad
        self.n_mels = n_mels  # number of mel frequency bins
        self.f_max = f_max
        self.f_min = f_min
        self.spectrogram = Spectrogram(n_fft=self.n_fft, win_length=self.win_length,
                                       hop_length=self.hop_length,
                                       pad=self.pad, window_fn=window_fn, power=2,
                                       normalized=False, wkwargs=wkwargs)
        self.mel_scale = MelScale(self.n_mels, self.sample_rate, self.f_min, self.f_max, self.n_fft // 2 + 1)
github pytorch / audio / test / test_transforms.py View on Github external
def _test_librosa_consistency_helper(n_fft, hop_length, power, n_mels, n_mfcc, sample_rate):
            input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
            sound, sample_rate = torchaudio.load(input_path)
            sound_librosa = sound.cpu().numpy().squeeze()  # (64000)

            # test core spectrogram
            spect_transform = torchaudio.transforms.Spectrogram(n_fft=n_fft, hop_length=hop_length, power=2)
            out_librosa, _ = librosa.core.spectrum._spectrogram(y=sound_librosa,
                                                                n_fft=n_fft,
                                                                hop_length=hop_length,
                                                                power=2)

            out_torch = spect_transform(sound).squeeze().cpu()
            self.assertTrue(torch.allclose(out_torch, torch.from_numpy(out_librosa), atol=1e-5))

            # test mel spectrogram
            melspect_transform = torchaudio.transforms.MelSpectrogram(
                sample_rate=sample_rate, window_fn=torch.hann_window,
                hop_length=hop_length, n_mels=n_mels, n_fft=n_fft)
            librosa_mel = librosa.feature.melspectrogram(y=sound_librosa, sr=sample_rate,
                                                         n_fft=n_fft, hop_length=hop_length, n_mels=n_mels,
                                                         htk=True, norm=None)
            librosa_mel_tensor = torch.from_numpy(librosa_mel)
github pytorch / audio / test / test_transforms.py View on Github external
def test_batch_spectrogram(self):
        waveform, sample_rate = torchaudio.load(self.test_filepath)

        # Single then transform then batch
        expected = transforms.Spectrogram()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = transforms.Spectrogram()(waveform.repeat(3, 1, 1))

        self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
        self.assertTrue(torch.allclose(computed, expected))
github pytorch / audio / test / test_transforms.py View on Github external
def test_batch_spectrogram(self):
        waveform, sample_rate = torchaudio.load(self.test_filepath)

        # Single then transform then batch
        expected = transforms.Spectrogram()(waveform).repeat(3, 1, 1, 1)

        # Batch then transform
        computed = transforms.Spectrogram()(waveform.repeat(3, 1, 1))

        self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
        self.assertTrue(torch.allclose(computed, expected))
github pytorch / audio / test / test_jit.py View on Github external
def test_scriptmodule_Spectrogram(self):
        tensor = torch.rand((1, 1000), device="cuda")

        self._test_script_module(tensor, transforms.Spectrogram)
github Roboy / sonosco / sonosco / loader.py View on Github external
...

        :param audio_conf: Dictionary containing the sample rate, window and the window length/stride in seconds
        :param labels: String containing all the possible characters to map to
        :param normalize: Apply standard mean and deviation normalization to audio tensor
        :param augment(default False):  Apply random tempo and gain perturbations
        """
        self.labels_map = dict([(labels[i], i) for i in range(len(labels))])
        self.window_stride = audio_conf["window_stride"]
        self.window_size = audio_conf["window_size"]
        self.sample_rate = audio_conf["sample_rate"]
        self.window = windows_legacy.get(audio_conf["window"], windows_legacy["hamming"]) if legacy else windows.get(audio_conf["window"], windows["hamming"])
        self.normalize = normalize
        self.augment = augment
        self.legacy = legacy
        self.transform = torchaudio.transforms.Spectrogram(n_fft=int(self.sample_rate * self.window_size),
                                                           hop=int(self.sample_rate * self.window_stride),
                                                           window=self.window, normalize=self.normalize)
github pytorch / audio / torchaudio / _docs.py View on Github external
from __future__ import absolute_import, division, print_function, unicode_literals
import torchaudio


# TODO See https://github.com/pytorch/audio/issues/165
class Spectrogram:
    forward = torchaudio.transforms.Spectrogram().forward


class AmplitudeToDB:
    forward = torchaudio.transforms.AmplitudeToDB().forward


class MelScale:
    forward = torchaudio.transforms.MelScale().forward


class MelSpectrogram:
    forward = torchaudio.transforms.MelSpectrogram().forward


class MFCC:
    forward = torchaudio.transforms.MFCC().forward
github Xilinx / brevitas / brevitas_examples / speech_to_text / quartznet / audio_preprocessing.py View on Github external
self.win_length = n_window_size
        self.hop_length = n_window_stride

        self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length))

        # Set window_fn. None defaults to torch.ones.
        window_fn = self.torch_windows.get(window, None)
        if window_fn is None:
            raise ValueError(
                f"Window argument for AudioProcessor is invalid: {window}."
                f"For no window function, use 'ones' or None.")

        # Create featurizer.
        # Calls torch.stft under the hood, and is hard-coded to use center=True
        self.featurizer = torchaudio.transforms.Spectrogram(
            n_fft=self.n_fft,
            win_length=self.win_length,
            hop_length=self.hop_length,
            window_fn=window_fn,
            normalized=normalized
        )
        self.featurizer.to(self._device)
github Roboy / sonosco / loader.py View on Github external
...

        :param audio_conf: Dictionary containing the sample rate, window and the window length/stride in seconds
        :param labels: String containing all the possible characters to map to
        :param normalize: Apply standard mean and deviation normalization to audio tensor
        :param augment(default False):  Apply random tempo and gain perturbations
        """
        self.labels_map = dict([(labels[i], i) for i in range(len(labels))])
        self.window_stride = audio_conf["window_stride"]
        self.window_size = audio_conf["window_size"]
        self.sample_rate = audio_conf["sample_rate"]
        self.window = windows_legacy.get(audio_conf["window"], windows_legacy["hamming"]) if legacy else windows.get(audio_conf["window"], windows["hamming"])
        self.normalize = normalize
        self.augment = augment
        self.legacy = legacy
        self.transform = torchaudio.transforms.Spectrogram(n_fft=int(self.sample_rate * self.window_size),
                                                           hop=int(self.sample_rate * self.window_stride),
                                                           window=self.window, normalize=self.normalize)