How to use the librosa.power_to_db function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ynop / audiomate / audiomate / processing / pipeline / rhythm.py View on Github external
def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        # Cleanup rest if it's the first frame
        if chunk.offset == 0:
            self.rest = None

        # Compute mel-spectrogram
        power_spec = np.abs(spectral.stft_from_frames(chunk.data.T)) ** 2
        mel = np.abs(librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate))
        mel_power = librosa.power_to_db(mel)

        # Compute onset strengths
        oenv = librosa.onset.onset_strength(S=mel_power, center=False)

        # Remove context, otherwise we have duplicate frames while online processing
        oenv = oenv[chunk.left_context:]

        if self.rest is not None:
            all_frames = np.concatenate([self.rest, oenv])
        else:
            # Its the first chunk --> pad to center tempogram windows at the beginning
            all_frames = np.pad(oenv, (self.win_length // 2, 0), mode='linear_ramp', end_values=0)

        if chunk.is_last:
            # Its the last chunk --> pad to center tempogram windows at end
            all_frames = np.pad(all_frames, (0, self.win_length // 2), mode='linear_ramp', end_values=0)
github ankitshah009 / WALNet-Weak_Label_Analysis / feature_extraction / compute_melspectrograms_128.py View on Github external
if not os.path.isfile(os.path.expanduser(infl_new.strip())):
                print(os.path.expanduser(infl_new.strip()))
                print (infl + ' Not Found')
        else:
            #Fetch the audio samples at the sampling rate.
            y,sr = librosa.load(infl_new.strip(),sr=None)
            if len(y.shape) > 1:
                       print ('Mono Conversion')
                       y = librosa.to_mono(y)
            if sr != sampling_freq:
                       print ('Resampling {}'.format(sr))
                       y = librosa.resample(y,sr,sampling_freq) 
            #mel-spectogram
            spec = librosa.feature.melspectrogram(y, sr=sampling_freq, n_fft=window_length, hop_length=hop_length, n_mels=num_mels)
            #Log scaling
            spec = librosa.power_to_db(spec,ref=1.0)
            infl_list = infl_new.strip().split("/")
            file_name = infl_list[-1].strip()
            out_dir = output_root + "/" + infl_list[-2]
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            specfile = str(out_dir) + '/' + str(file_name) + '.orig.spec.npy'
            print (specfile)
            np.save(specfile, spec, allow_pickle=False)
            if infl_list[-2].strip() == "":
                output_mel_file = str(file_name) + '.orig.spec.npy'
            else:
                output_mel_file = infl_list[-2].strip() + '/' + str(file_name) + '.orig.spec.npy'
            print (output_mel_file)
            out_file_list.append(output_mel_file.strip())
    create_file(output_file,out_file_list)
github ganesh-srinivas / laughter / scripts / predict_convnet_laughterornot_10sec_model.py View on Github external
def extract_features_from_waveforms(waveforms):
    """
    Extract log-scaled mel-spectrograms and their corresponding 
    deltas from the audio waveform (not the filename)
    """
    log_specgrams = []
    #labels=[]
    for s in waveforms:
      sound_clip = shape_sound_clip(s)

      melspec = librosa.feature.melspectrogram(sound_clip, n_mels = 120, n_fft=1024)
      #print melspec.shape

      logspec = librosa.power_to_db(melspec, ref = np.max)
      #print logspec.shape
      logspec = logspec.T.flatten()[:, np.newaxis].T
      #print logspec.shape

      #print "Produce of two elements in melspec: ", melspec.shape[0]*melspec.shape[1]  
      log_specgrams.append(logspec)
      del sound_clip
      del melspec
      del logspec
      #labels.append(labeltext2labelid(f.split('/')[-2]))  

    log_specgrams=np.asarray(log_specgrams).reshape(len(log_specgrams),bands,frames,1)

    features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis=3)

    for i in range(len(features)):
github interactiveaudiolab / voogle / archive / run_model_pytorch.py View on Github external
def preprocessing_imi(imi_path):

    y, sr = librosa.load(imi_path, sr=16000)

    # zero-padding 
    if y.shape[0] < 4*sr:
        pad = np.zeros((4*sr-y.shape[0]))
        y_fix = np.append(y, pad)
    else:
        y_fix = y[0:int(4*sr)]

    S = librosa.feature.melspectrogram(y=y_fix, sr=sr, n_fft=133, 
                                       hop_length=133, power=2, n_mels=39, 
                                       fmin=0.0, fmax=5000)
    S = S[:, :482]
    S_db = librosa.power_to_db(S, ref=np.max)

    imi_spectrogram = [S_db]
    
    imi_spectrogram = np.array(imi_spectrogram).astype('float32')

    imi_spectrogram_norm = normalize_spectrogram(imi_spectrogram)

    return imi_spectrogram_norm
github mdangschat / speech-corpus-dl / tools / audio_sample_info.py View on Github external
plt.title('Harmonic and Percussive')

    # Add file information.
    plt.subplot(3, 1, 3)
    plt.axis('off')
    plt.text(0.0, 1.0, info_str_tex, color='black', verticalalignment='top')
    plt.tight_layout()

    # Calculating MEL spectrogram and MFCC.
    db_pow = np.abs(
        librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)) ** 2

    s_mel = librosa.feature.melspectrogram(S=db_pow, sr=sr, hop_length=hop_length,
                                           fmax=f_max, fmin=f_min, n_mels=n_mels)

    s_mel = librosa.power_to_db(s_mel, ref=np.max)
    s_mfcc = librosa.feature.mfcc(S=s_mel, sr=sr, n_mfcc=n_mfcc)

    # STFT (Short-time Fourier Transform)
    # https://librosa.github.io/librosa/generated/librosa.core.stft.html
    plt.figure(figsize=(12, 10))
    db = librosa.amplitude_to_db(librosa.magphase(librosa.stft(y))[0], ref=np.max)
    plt.subplot(3, 2, 1)
    display.specshow(db, sr=sr, x_axis='time', y_axis='linear', hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Linear-frequency power spectrogram')

    plt.subplot(3, 2, 2)
    display.specshow(db, sr=sr, x_axis='time', y_axis='log', hop_length=hop_length)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log-frequency power spectrogram')
github interactiveaudiolab / voogle / archive / run_model_pytorch_segment.py View on Github external
i = 0
            segments = []
            while ((i + (4*sr)) <= y_copy.shape[0]):
                segments.append(y_copy[i:(i+4*sr)])
                i = i + (2*sr)


            ref_spectrograms_segments = []

            for seg in segments:

                S = librosa.feature.melspectrogram(y=seg, sr=sr, n_fft=1024, hop_length=1024, power=2)

                S = S[:, 0:128]
                S_db = librosa.power_to_db(S, ref=np.max)
                ref_spec = [S_db]
                ref_spec = np.array(ref_spec).astype('float32')
                # print ref_spec.shape
                ref_spec = normalize_spectrogram(ref_spec)

                ref_spectrograms_segments.append(ref_spec)

            ref_file_names.append(f)

            ref_sepctrograms.append(ref_spectrograms_segments)
            ref_spectrograms_segments = []

    # print ref_file_names
    return np.array(ref_file_names), ref_sepctrograms
github tqbl / dcase2018_task2 / task2 / features.py View on Github external
Args:
            x (np.ndarray): Input time-series signal.
            sample_rate (number): Sampling rate of signal.

        Returns:
            np.ndarray: The logmel feature vector.
        """
        # Resample to target sampling rate
        x = librosa.resample(x, sample_rate, self.sample_rate)

        # Compute short-time Fourier transform
        D = librosa.stft(x, n_fft=self.n_window, hop_length=self.hop_length)
        # Transform to Mel frequency scale
        S = np.dot(self.mel_fb, np.abs(D) ** 2).T
        # Apply log nonlinearity and return as float32
        return librosa.power_to_db(S, ref=np.max, top_db=None)
github csteinmetz1 / NeuralReverberator / util.py View on Github external
_min = np.amin(log_power_spectra)
                _max = np.amax(log_power_spectra)
                normalized_log_power_spectra = (log_power_spectra - _min) / (_max - _min)
                filename = f"ir_{sample_names[idx]}_{specs_generated+1}"
                np.savetxt(os.path.join(output_dir, filename + ".txt"), normalized_log_power_spectra)
                specs_generated += 1

                if save_plots:
                    if not os.path.isdir("spect_plots"):
                        os.makedirs("spect_plots")
                    plot_specgrams(log_power_spectra, normalized_log_power_spectra, 
                                      16000, filename + ".png", "spect_plots")
        
        S = librosa.stft(audio, n_fft=n_fft, hop_length=n_hop, center=True)
        power_spectra = np.abs(S)**2
        log_power_spectra = librosa.power_to_db(power_spectra)
        _min = np.amin(log_power_spectra)
        _max = np.amax(log_power_spectra)
        if _min == _max:
            print(f"divide by zero in {filename}")
        else:
            normalized_log_power_spectra = (log_power_spectra - _min) / (_max - _min)
            filename = f"ir_{sample_names[idx]}_{specs_generated+1}"
            np.savetxt(os.path.join(output_dir, filename + ".txt"), normalized_log_power_spectra)
            specs_generated += 1

            if save_plots:
                if not os.path.isdir("spect_plots"):
                    os.makedirs("spect_plots")
                plot_specgrams(normalized_log_power_spectra, 16000, filename + ".png", "spect_plots")

        sys.stdout.write(f"* Computed {specs_generated}/{n_specs} RIR spectrograms\r")
github csteinmetz1 / NeuralReverberator / util.py View on Github external
specs_generated = 0
    n_specs = len(IRs)

    for idx in range(len(IRs)):
        audio = np.reshape(IRs[idx], (IRs[idx].shape[0],))

        if augment_data:
            
            augmented_audio = augment_audio(audio, 16000, 
                                            stretch_factors=[0.80, 0.90, 1.10, 1.20], 
                                            shift_factors=[-2, -1, 1, 2])
            n_specs = len(IRs) * (len(stretch_factors) + len(shift_factors))                                
            for augment in augmented_audio:
                S = librosa.stft(augment, n_fft=n_fft, hop_length=n_hop, center=True)
                power_spectra = np.abs(S)**2
                log_power_spectra = librosa.power_to_db(power_spectra)
                _min = np.amin(log_power_spectra)
                _max = np.amax(log_power_spectra)
                normalized_log_power_spectra = (log_power_spectra - _min) / (_max - _min)
                filename = f"ir_{sample_names[idx]}_{specs_generated+1}"
                np.savetxt(os.path.join(output_dir, filename + ".txt"), normalized_log_power_spectra)
                specs_generated += 1

                if save_plots:
                    if not os.path.isdir("spect_plots"):
                        os.makedirs("spect_plots")
                    plot_specgrams(log_power_spectra, normalized_log_power_spectra, 
                                      16000, filename + ".png", "spect_plots")
        
        S = librosa.stft(audio, n_fft=n_fft, hop_length=n_hop, center=True)
        power_spectra = np.abs(S)**2
        log_power_spectra = librosa.power_to_db(power_spectra)