How to use the torchaudio.save function in torchaudio

To help you get started, we’ve selected a few torchaudio examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pytorch / audio / test / test_compliance_kaldi.py View on Github external
def _create_data_set(self):
        # used to generate the dataset to test on. this is not used in testing (offline procedure)
        test_dirpath = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
        test_filepath = os.path.join(test_dirpath, 'assets', 'kaldi_file.wav')
        sr = 16000
        x = torch.arange(0, 20).float()
        # between [-6,6]
        y = torch.cos(2 * math.pi * x) + 3 * torch.sin(math.pi * x) + 2 * torch.cos(x)
        # between [-2^30, 2^30]
        y = (y / 6 * (1 << 30)).long()
        # clear the last 16 bits because they aren't used anyways
        y = ((y >> 16) << 16).float()
        torchaudio.save(test_filepath, y, sr)
        sound, sample_rate = torchaudio.load(test_filepath, normalization=False)
        print(y >> 16)
        self.assertTrue(sample_rate == sr)
        self.assertTrue(torch.allclose(y, sound))
github dhpollack / fast-wavenet.pytorch / test / test_wavenet.py View on Github external
def test4_wavenet_audio(self):
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            print("install matplotlib for plot of signals")
            plt = None

        num_samples = 1 << 15

        sig, sr = torchaudio.load("test/data/david.wav")
        sig = sig[:-(sig.size(0)%3):3]
        input = sig[16000:(16000+num_samples)].contiguous()
        # write sample for qualitative test
        torchaudio.save("test/data/david_16000hz_input_sample.wav", input, sr//3)
        input /= torch.abs(input).max()
        assert input.min() >= -1. and input.max() <= 1.
        input = input.view(1, 1, -1)
        labels = input.numpy()
        labels = mu_law_encoding(labels, 256)
        labels = torch.from_numpy(labels).squeeze().long()

        # build network and optimizer
        m = FastWaveNet(layers=10,
                        blocks=4, # number of blocks
                        residual_channels=16,
                        dilation_channels=32,
                        skip_channels=16,
                        quantization_channels=256,
                        input_len=num_samples,
                        audio_channels=1,
github vsimkus / voice-conversion / eval_vqvae.py View on Github external
# import numpy as np
# print(out.detach().numpy() < 0)


#TODO: Upsampling?

# Check out path
out_path = os.path.expanduser(args.eval_out_path)
if not os.path.exists(out_path):
    os.mkdir(out_path)

out_filename = '{}_{}.wav'.format(os.path.basename(audio_path).split('.')[0], args.eval_speaker_id)
out_file_path = os.path.join(out_path, out_filename)

# Save as audio
torchaudio.save(filepath=out_file_path, src=out, sample_rate=sr)
github yoyololicon / pytorch_FFTNet / FFTNet_generator.py View on Github external
step += 1
                if step > steps:
                    break
            """

    print("Training time cost:", datetime.now().replace(microsecond=0) - a)

    print("Start to generate some noise...")
    net = net.cpu()
    net.eval()
    with torch.no_grad():
        a = datetime.now().replace(microsecond=0)
        generation = net.fast_generate(int(sr * generation_time), c=c)
        decoder = transforms.MuLawExpanding(channels)
        generation = decoder(generation)
        torchaudio.save(filename, generation, sr)
        print("Generation time cost:", datetime.now().replace(microsecond=0) - a)
github pytorch / audio / torchaudio / legacy.py View on Github external
r"""Saves a Tensor with audio signal to disk as a standard format like mp3, wav, etc.
    The default options have changed as of torchaudio 0.2 and this function maintains
    option defaults from version 0.1.

    Args:
        filepath (str): Path to audio file
        src (torch.Tensor): An input 2D Tensor of shape `[L x C]` where L is
            the number of audio frames, C is the number of channels
        sample_rate (int): The sample-rate of the audio to be saved
        precision (int, optional): The bit-precision of the audio to be saved. (Default: ``32``)

    Example
        >>> data, sample_rate = torchaudio.legacy.load('foo.mp3')
        >>> torchaudio.legacy.save('foo.wav', data, sample_rate)
    """
    torchaudio.save(filepath, src, sample_rate, precision, False)
github yoyololicon / pytorch_FFTNet / FFTNet_vocoder.py View on Github external
print(step, "{:.4f}".format(loss.item()))
            step += 1
            if step > steps:
                break

    print("Training time cost:", datetime.now().replace(microsecond=0) - a)

    print("Start to generate some noise...")
    net = net.cpu()
    net.eval()
    with torch.no_grad():
        a = datetime.now().replace(microsecond=0)
        generation = net.fast_generate(h=test_features, c=c)
        generation = dec(generation)
        torchaudio.save(filename, generation, sr)
        cost = datetime.now().replace(microsecond=0) - a
        print("Generation time cost:", cost, ". Speed:", generation.size(0)/cost.total_seconds(), "samples/sec.")
github JusperLee / Conv-TasNet / Conv_TasNet_Pytorch / AudioReader.py View on Github external
def write_wav(fname, src, sample_rate):
    '''
         Write wav file
         input:
               fname: wav file path
               src: frames of audio
               sample_rate: An integer which is the sample rate of the audio
         output:
               None
    '''
    torchaudio.save(fname, src, sample_rate)