How to use the librosa.output function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Hiroshiba / realtime-yukarin / tests / test_all_stream.py View on Github external
def _concat_and_save(_waves, _path: str):
            wave = numpy.concatenate(_waves).astype(numpy.float32)
            librosa.output.write_wav(_path, wave, self.out_sampling_rate)
github haoxiangsnr / Wave-U-Net-for-Speech-Enhancement / enhancement.py View on Github external
mixture = torch.cat([mixture, torch.zeros(1, 1, padded_length, device=device)], dim=-1)

    assert mixture.size(-1) % sample_length == 0 and mixture.dim() == 3
    mixture_chunks = list(torch.split(mixture, sample_length, dim=-1))

    enhanced_chunks = []
    for chunk in mixture_chunks:
        enhanced_chunks.append(model(chunk).detach().cpu())

    enhanced = torch.cat(enhanced_chunks, dim=-1)  # [1, 1, T]
    enhanced = enhanced if padded_length == 0 else enhanced[:, :, :-padded_length]

    enhanced = enhanced.reshape(-1).numpy()

    output_path = os.path.join(output_dir, f"{name}.wav")
    librosa.output.write_wav(output_path, enhanced, sr=16000)
github EdwinYam / J-Net / Evaluate.py View on Github external
self.shape = shape
    track = TrackLike(audio, sr, audio.shape)

    sources_pred = predict(track, model_config, load_model) # Input track to prediction function, get source estimates

    # Save source estimates as audio files into output dictionary
    input_folder, input_filename = os.path.split(input_path)
    if output_path is None:
        # By default, set it to the input_path folder
        output_path = input_folder
    if not os.path.exists(output_path):
        print("WARNING: Given output path " + output_path + " does not exist. Trying to create it...")
        os.makedirs(output_path)
    assert(os.path.exists(output_path))
    for source_name, source_audio in list(sources_pred.items()):
        librosa.output.write_wav(os.path.join(output_path, input_filename) + "_" + source_name + ".wav", source_audio, sr)
github dodiku / audio_noise_clustering / 02_spectral_clustering_col_by_col / spectral_col.py View on Github external
spectragram2[r,c] = 0;
    # if spectral_fit_predict_reversed[r] == 0:
    #     for c in range(0,columns2):
    #         spectragram2[r,c] = 0

directory = '02_spectral_clustering_col_by_col/result01/'
output_file = directory + 'output.wav'
plot_file = directory + 'spectral.png'

if not os.path.exists(directory):
    os.makedirs(directory)

# output = stft.ispectrogram(spectragram2)
# wavfile.write(output_file, fs, output)
output = librosa.core.istft(spectragram2)
librosa.output.write_wav(output_file,output,sr)
addTimestamp('result01')

plt.figure(1).set_size_inches(12,8)
plt.figure(1).subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=0.9, wspace=0.6, hspace=0.8)
plt.pcolormesh(librosa.amplitude_to_db(spectragram2), cmap="YlGnBu")
plt.ylabel('Frequency [Hz]')
plt.xlabel('Samples')
plt.savefig(plot_file, dpi=300)


print ('🥝  result01 is done.\n')

'''--------------------
# generating result02: remove only possitive value
# --------------------'''
spectragram2 = np.copy(spectragram)
github CorentinJ / Real-Time-Voice-Cloning / vocoder / audio.py View on Github external
def save_wav(x, path) :
    librosa.output.write_wav(path, x.astype(np.float32), sr=hp.sample_rate)
github dodiku / audio_noise_clustering / 02_spectral_clustering_col_by_col / spectral_col.py View on Github external
if all_labels[r,c] == 0:
            if spectragram2[r,c] > 0:
                # spectragram_db[r,c] = 0;
                spectragram2[r,c] = spectragram2[r,c] * 0.2;

directory = '02_spectral_clustering_col_by_col/result03/'
output_file = directory + 'output.wav'
plot_file = directory + 'spectral.png'

if not os.path.exists(directory):
    os.makedirs(directory)

# output = stft.ispectrogram(spectragram2)
# wavfile.write(output_file, fs, output)
output = librosa.core.istft(spectragram2)
librosa.output.write_wav(output_file,output,sr)
addTimestamp('result03')

plt.figure(1).set_size_inches(12,8)
plt.figure(1).subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=0.9, wspace=0.6, hspace=0.8)
plt.pcolormesh(librosa.amplitude_to_db(spectragram2), cmap="YlGnBu")
plt.ylabel('Frequency [Hz]')
plt.xlabel('Samples')
plt.savefig(plot_file, dpi=300)
print ('🥝  result03 is done.\n')

'''--------------------
generating result04: reduce all
--------------------'''
spectragram2 = np.copy(spectragram)
rows2, columns2 = spectragram2.shape
github SConsul / audio-source-separation / code / post_processing.py View on Github external
def reconstruct(phase, bass_mag, vocals_mag, drums_mag,others_mag,song_num,segment_num,destination_path):
	# Retrieve complex STFT
	vocals = np.squeeze(vocals_mag.detach().numpy() * phase,axis= (0,1))
	#print(vocals.shape)
	bass = np.squeeze(bass_mag.detach().numpy() * phase, axis=(0,1))
	drums = np.squeeze(drums_mag.detach().numpy() * phase, axis=(0,1))
	others = np.squeeze(others_mag.detach().numpy() * phase, axis=(0,1))

	# Perform ISTFT
	vocals_audio = librosa.istft(vocals, win_length=1024,hop_length=256,window='hann',center='True')
	bass_audio = librosa.istft(bass, win_length=1024,hop_length=256,window='hann',center='True')
	drums_audio = librosa.istft(drums, win_length=1024,hop_length=256,window='hann',center='True')
	others_audio = librosa.istft(others, win_length=1024,hop_length=256,window='hann',center='True')

	# Save as wav files
	librosa.output.write_wav(os.path.join(destination_path,'vocals',str(song_num)+'_'+str(segment_num)+'.wav'), vocals_audio,sr=44100)
	librosa.output.write_wav(os.path.join(destination_path,'bass',str(song_num)+'_'+str(segment_num)+'.wav'), bass_audio, sr=44100)
	librosa.output.write_wav(os.path.join(destination_path,'drums',str(song_num)+'_'+str(segment_num)+'.wav'), drums_audio, sr=44100)
	librosa.output.write_wav(os.path.join(destination_path,'others',str(song_num)+'_'+str(segment_num)+'.wav'), others_audio, sr=44100)
	return
github csteinmetz1 / MixCNN / pre_process.py View on Github external
y, sr = librosa.load(stem, sr=44100, mono=False)
            y_left  = y[0,:]
            y_right = y[1,:]

            for factor in [0.81]: #[0.81, 0.93, 1.07, 1.23]:
                subdir = "stretch_{}".format(factor)
                if not os.path.isdir(os.path.join(song, "augmented", subdir)):
                    os.makedirs(os.path.join(song, "augmented", subdir))
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore", category=FutureWarning)
                    stretch_left = librosa.effects.time_stretch(y_left, factor)
                    stretch_right = librosa.effects.time_stretch(y_right, factor)
                stretch = np.stack((stretch_left, stretch_right), axis=0)
                #stretch = np.reshape(stretch, (stretch.shape[1], stretch.shape[0]))
                filename = "{}.wav".format(stem_class)
                librosa.output.write_wav(os.path.join(song, "augmented", subdir, filename), stretch, sr)
                sys.stdout.write(" Stretching by {: >4}     \r".format(factor))
                sys.stdout.flush()

            for semitones in [0.5]: #[-1, -0.5, 0.5, 1]:
                subdir = "shift_{}".format(semitones)
                if not os.path.isdir(os.path.join(song, "augmented", subdir)):
                    os.makedirs(os.path.join(song, "augmented", subdir))
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore", category=FutureWarning)
                    shift_left = librosa.effects.pitch_shift(y_left, sr, n_steps=semitones)
                    shift_right = librosa.effects.pitch_shift(y_right, sr, n_steps=semitones)
                shift = np.stack((shift_left, shift_right), axis=0)
                #shift = np.reshape(shift, (shift.shape[1], shift.shape[0]))
                filename = "{}.wav".format(stem_class)
                librosa.output.write_wav(os.path.join(song, "augmented", subdir, filename), shift, sr)
                sys.stdout.write(" Shifting by {: >2}      \r".format(semitones))
github youssefsharief / arabic-tacotron-tts / util / audio.py View on Github external
def save_wav(wav, path):
  wav *= 32767 / max(0.01, np.max(np.abs(wav)))
  librosa.output.write_wav(path, wav.astype(np.int16), hparams.sample_rate)
github ksw0306 / WaveVAE / train.py View on Github external
def synthesize(model, ema=None):
    global global_step
    if ema is not None:
        model_ema = clone_as_averaged_model(model, ema)
    model_ema.eval()
    for batch_idx, (x, _, c, _) in enumerate(synth_loader):
        if batch_idx == 0:
            x, c = x.to(device), c.to(device)

            q_0 = Normal(x.new_zeros(x.size()), x.new_ones(x.size()))
            z = q_0.sample()
            wav_truth_name = '{}/{}/generate_{}_{}_truth.wav'.format(args.sample_path, args.model_name, global_step, batch_idx)
            librosa.output.write_wav(wav_truth_name, x.to(torch.device("cpu")).squeeze().numpy(), sr=22050)
            print('{} Saved!'.format(wav_truth_name))

            torch.cuda.synchronize()
            start_time = time.time()

            with torch.no_grad():
                if args.num_gpu == 1:
                    x_prior = model_ema.generate(z, c).squeeze()
                else:
                    x_prior = model_ema.module.generate(z, c).squeeze()
            torch.cuda.synchronize()
            print('{} seconds'.format(time.time() - start_time))
            wav = x_prior.to(torch.device("cpu")).data.numpy()
            wav_name = '{}/{}/generate_{}_{}.wav'.format(args.sample_path, args.model_name, global_step, batch_idx)
            librosa.output.write_wav(wav_name, wav, sr=22050)
            print('{} Saved!'.format(wav_name))