How to use the librosa.util function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github interactiveaudiolab / nussl / nussl / deep / datasets / base_dataset.py View on Github external
def mask_mixture(mask, mix):
            n = len(mix)
            mix = librosa.util.fix_length(mix, n + self.n_fft // 2)
            mix_stft = librosa.stft(
                mix,
                n_fft=self.n_fft,
                hop_length=self.hop_length
            )
            masked_mix = mix_stft * mask
            source = librosa.istft(
                masked_mix,
                hop_length=self.hop_length,
                length=n
            )
            return source
github HENDRIX-ZT2 / pyaudiorestoration / dropouts_gui.py View on Github external
def process_max_mono(self, fft_size, hop):
		for file_name in self.file_names:
			file_path = self.names_to_full_paths[file_name]
			signal, sr, channels = io_ops.read_file(file_path)
			if channels != 2:
				print("expects stereo input")
				continue

			n = len(signal)
			# pad input stereo signal
			y_pad = librosa.util.fix_length(signal, n + fft_size // 2, axis=0)
			# take FFT for each channel
			D_L = librosa.stft(y_pad[:,0], n_fft=fft_size, hop_length=hop)
			D_R = librosa.stft(y_pad[:,1], n_fft=fft_size, hop_length=hop)

			# take the max of each bin
			D_out = np.where( np.abs(D_L) > np.abs(D_R), D_L, D_R )
			# take iFFT
			y_out = librosa.istft(D_out, length=n, hop_length=hop)

			io_ops.write_file(file_path, y_out, sr, 1)
github librosa / librosa / librosa / core / spectrum.py View on Github external
ytmp = ifft_window * fft.irfft(stft_matrix[:, bl_s:bl_t], axis=0)

        # Overlap-add the istft block starting at the i'th frame
        __overlap_add(y[frame * hop_length:], ytmp, hop_length)

        frame += (bl_t - bl_s)

    # Normalize by sum of squared window
    ifft_window_sum = window_sumsquare(window,
                                       n_frames,
                                       win_length=win_length,
                                       n_fft=n_fft,
                                       hop_length=hop_length,
                                       dtype=dtype)

    approx_nonzero_indices = ifft_window_sum > util.tiny(ifft_window_sum)
    y[approx_nonzero_indices] /= ifft_window_sum[approx_nonzero_indices]

    if length is None:
        # If we don't need to control length, just do the usual center trimming
        # to eliminate padded data
        if center:
            y = y[int(n_fft // 2):-int(n_fft // 2)]
    else:
        if center:
            # If we're centering, crop off the first n_fft//2 samples
            # and then trim/pad to the target length.
            # We don't trim the end here, so that if the signal is zero-padded
            # to a longer duration, the decay is smooth by windowing
            start = int(n_fft // 2)
        else:
            # If we're not centering, start at 0 and trim/pad as necessary
github bmcfee / crema / tests / test_chord.py View on Github external
def AUDIOFILE():
    return librosa.util.example_audio_file()
github philipperemy / tensorflow-ctc-speech-recognition / audio_reader.py View on Github external
'target': target_text,
                           FILENAME: filename}
                    cache_filename = filename.split('/')[-1].split('.')[0] + '_cache'
                    tmp_filename = os.path.join(cache_dir, cache_filename) + '.pkl'
                    with open(tmp_filename, 'wb') as f:
                        dill.dump(obj, f)
                        print('[DUMP AUDIO] {}'.format(tmp_filename))
                    if speaker_id not in self.metadata:
                        self.metadata[speaker_id] = {}
                    sentence_id = extract_sentence_id(filename)
                    if sentence_id not in self.metadata[speaker_id]:
                        self.metadata[speaker_id][sentence_id] = []
                    self.metadata[speaker_id][sentence_id] = {SPEAKER_ID: speaker_id,
                                                              SENTENCE_ID: sentence_id,
                                                              FILENAME: filename}
                except librosa.util.exceptions.ParameterError as e:
                    print(e)
                    print('[DUMP AUDIO ERROR SKIPPING FILENAME] {}'.format(filename))
            dill.dump(self.metadata, open(os.path.join(cache_dir, 'metadata.pkl'), 'wb'))

        print('Using the generated files at {}. Using them to load the cache. '
              'Be sure to have enough memory.'.format(cache_dir))
        self.metadata = dill.load(open(os.path.join(cache_dir, 'metadata.pkl'), 'rb'))

        pickle_files = find_files(cache_dir, pattern='*.pkl')
        for pkl_file in pickle_files:
            if 'metadata' not in pkl_file:
                with open(pkl_file, 'rb') as f:
                    obj = dill.load(f)
                    self.cache[obj[FILENAME]] = obj
        print('Cache took {0:.2f} seconds to load. {1:} keys.'.format(time() - st, len(self.cache)))
github librosa / librosa / librosa / onset.py View on Github external
>>> plt.subplot(2,1,2)
    >>> plt.plot(rms[0], label='RMS')
    >>> plt.vlines(onset_bt_rms, 0, rms.max(), label='Backtracked (RMS)', color='r')
    >>> plt.legend(frameon=True, framealpha=0.75)
    >>> plt.show()
    '''

    # Find points where energy is non-increasing
    # all points:  energy[i] <= energy[i-1]
    # tail points: energy[i] < energy[i+1]
    minima = np.flatnonzero((energy[1:-1] <= energy[:-2]) &
                            (energy[1:-1] < energy[2:]))

    # Pad on a 0, just in case we have onsets with no preceding minimum
    # Shift by one to account for slicing in minima detection
    minima = util.fix_frames(1 + minima, x_min=0)

    # Only match going left from the detected events
    return minima[util.match_events(events, minima, right=False)]
github albincorreya / ChromaCoverId / easyAudioFeatures.py View on Github external
def getBeatSyncChroma(audio_vector, fs, chromagram, display=True):
	"""
	Computes the beat-sync chromagram
	"""
	y_harmonic, y_percussive = librosa.effects.hpss(audio_vector)
	tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,sr=fs)
	print ("Tempo ->",tempo)
	beat_chroma = librosa.util.sync(chromagram,beat_frames,aggregate=np.median)
	if display is True:
		librosa.display.specshow(beat_chroma,x_axis='time', y_axis='chroma', cmap='gray_r', hop_length=4098)
	return beat_chroma
github ynop / audiomate / audiomate / utils / audio.py View on Github external
buffer = []
    n_buffer = 0
    n_samples = 0

    with audioread.audio_open(file_path) as input_file:
        n_channels = input_file.channels
        sr_native = input_file.samplerate

        start_sample = int(np.round(sr_native * start)) * n_channels
        end_sample = end

        if end_sample != np.inf:
            end_sample = int(np.round(sr_native * end)) * n_channels

        for block in input_file:
            block = librosa.util.buf_to_float(block)
            n_prev = n_samples
            n_samples += len(block)

            if n_samples < start_sample:
                continue

            if n_prev > end_sample:
                break

            if n_samples > end_sample:
                block = block[:end_sample - n_prev]

            if n_prev <= start_sample <= n_samples:
                block = block[start_sample - n_prev:]

            n_buffer += len(block)
github librosa / librosa / librosa / core / audio.py View on Github external
Examples
    --------
    >>> y, sr = librosa.load(librosa.util.example_audio_file(), mono=False)
    >>> y.shape
    (2, 1355168)
    >>> y_mono = librosa.to_mono(y)
    >>> y_mono.shape
    (1355168,)

    '''
    # Ensure Fortran contiguity.
    y = np.asfortranarray(y)

    # Validate the buffer.  Stereo is ok here.
    util.valid_audio(y, mono=False)

    if y.ndim > 1:
        y = np.mean(y, axis=0)

    return y
github librosa / librosa / librosa / core.py View on Github external
>>> y, sr = librosa.load(librosa.util.example_audio_file(), mono=False)
        >>> y.shape
        (2, 1354752)
        >>> y_mono = librosa.to_mono(y)
        >>> y_mono.shape
        (1354752,)

    :parameters:
        - y : np.ndarray [shape=(2,n) or shape=(n,)]

    :returns:
        - y_mono : np.ndarray [shape=(n,)]
    '''

    # Validate the buffer.  Stereo is ok here.
    util.valid_audio(y, mono=False)

    if y.ndim > 1:
        y = np.mean(y, axis=0)

    return y