How to use the librosa.feature.delta function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hrbigelow / ae-wavenet / mfcc.py View on Github external
n_mels=self.n_mels, n_mfcc=self.n_mfcc)

        def mfcc_pred_output_size(in_sz, window_sz, hop_sz):
            '''Reverse-engineered output size calculation derived by observing the
            behavior of librosa.feature.mfcc'''
            n_extra = 1 if window_sz % 2 == 0 else 0
            n_pos = in_sz + n_extra
            return n_pos // hop_sz + (1 if n_pos % hop_sz > 0 else 0)

        assert mfcc.shape[1] == mfcc_pred_output_size(wav_pad.shape[0],
            self.window_sz, self.hop_sz)

        mfcc_trim = mfcc[:,trim_left:-trim_right or None]

        mfcc_delta = librosa.feature.delta(mfcc_trim)
        mfcc_delta2 = librosa.feature.delta(mfcc_trim, order=2)
        mfcc_and_derivatives = np.concatenate((mfcc_trim, mfcc_delta, mfcc_delta2), axis=0)

        return mfcc_and_derivatives
github srviest / SoloLa / guitar_trans / models.py View on Github external
    @staticmethod
    def extract_features(y, mc, fn, ans=None):
        nmc, dmc = Feature.melody_features(mc)
        if np.any(np.isnan([nmc, dmc])):
            print('nan in {}.'.format(fn))
            print(mc)
            return None
        n_mfcc = 13
        mfcc = rosa.feature.mfcc(y, sr=SAMPLING_RATE, n_mfcc=n_mfcc, n_fft=512, hop_length=HOP_LENGTH)
        mfcc_d = rosa.feature.delta(mfcc)
        mfcc_d2 = rosa.feature.delta(mfcc, order=2)
        # feat_all = np.concatenate((mfcc, mfcc_d, mfcc_d2), axis=0).astype('float32')
        feat_all = np.concatenate((mfcc, mfcc_d, mfcc_d2, np.array([nmc]), np.array([dmc])), axis=0).astype('float32')
        return (feat_all, fn) if ans is None else (feat_all, ans, fn)
github foamliu / Speech-Transformer / utils.py View on Github external
st = int(sr * 0.001 * stride)
    if feature == 'fbank':  # log-scaled
        feat = librosa.feature.melspectrogram(y=yt, sr=sr, n_mels=dim,
                                              n_fft=ws, hop_length=st)
        feat = np.log(feat + 1e-6)
    elif feature == 'mfcc':
        feat = librosa.feature.mfcc(y=yt, sr=sr, n_mfcc=dim, n_mels=26,
                                    n_fft=ws, hop_length=st)
        feat[0] = librosa.feature.rmse(yt, hop_length=st, frame_length=ws)

    else:
        raise ValueError('Unsupported Acoustic Feature: ' + feature)

    feat = [feat]
    if delta:
        feat.append(librosa.feature.delta(feat[0]))

    if delta_delta:
        feat.append(librosa.feature.delta(feat[0], order=2))
    feat = np.concatenate(feat, axis=0)
    if cmvn:
        feat = (feat - feat.mean(axis=1)[:, np.newaxis]) / (feat.std(axis=1) + 1e-16)[:, np.newaxis]
    if save_feature is not None:
        tmp = np.swapaxes(feat, 0, 1).astype('float32')
        np.save(save_feature, tmp)
        return len(tmp)
    else:
        return np.swapaxes(feat, 0, 1).astype('float32')
github hirofumi0810 / neural_sp / utils / feature_extraction / wav2feature_librosa.py View on Github external
if feature_type == 'logfbank':
            # feat = librosa.core.logamplitude(feat)
            feat = librosa.core.spectrum.power_to_db(feat)
        if use_energy:
            rmse = librosa.feature.rmse(y=y,
                                        frame_length=2048,
                                        hop_length=512)
            # NOTE: `[1, T]`
            feat = np.concatenate((feat, rmse), axis=0)

    # Convert to time-major
    feat = feat.transpose((1, 0))

    if use_delta2:
        delta1_feat = librosa.feature.delta(feat, width=9)
        delta2_feat = librosa.feature.delta(delta1_feat, width=9)
        feat = np.concatenate((feat, delta1_feat, delta2_feat), axis=1)
    elif delta1:
        delta1_feat = librosa.feature.delta(feat, width=9)
        feat = np.concatenate((feat, delta1_feat), axis=1)

    return feat
github jordipons / elmarc / src / old_concept_GTZAN.py View on Github external
Extract tensor-flow features: extract audio, compute librosa features and
    pass it through the tensor-flow model to extract the *features_list*

    :param audio: String pointing where the audio is located
    :param sampling_rate: Sampling rate used when loading the audio (change it for down-sampling)

    :return features: Extracted features per *audio* song
    """
    if feature_type == 'MFCC':
        audio, sr = librosa.load(audio, sr=sampling_rate)

        mfcc = librosa.feature.mfcc(y=audio, sr=sampling_rate, n_mfcc=20)
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_std = np.std(mfcc, axis=1)

        mfcc_delta = librosa.feature.delta(mfcc)
        mfcc_delta_mean = np.mean(mfcc_delta, axis=1)
        mfcc_delta_std = np.std(mfcc_delta, axis=1)

        mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
        mfcc_delta2_mean = np.mean(mfcc_delta2, axis=1)
        mfcc_delta2_std = np.std(mfcc_delta2, axis=1)

        return np.concatenate((mfcc_mean, mfcc_std,
                               mfcc_delta_mean, mfcc_delta_std,
                               mfcc_delta2_mean, mfcc_delta2_std), axis=0)

    elif feature_type == 'CNN':
        # compute spectrogram
        audio, sr = librosa.load(audio, sr=sampling_rate)
        audio_rep = librosa.feature.melspectrogram(y=audio,
                                                   sr=sampling_rate,
github h2oai / driverlessai-recipes / transformers / speech / audio_MFCC_transformer.py View on Github external
audio = np.pad(audio, (offset, samples - len(audio) - offset), padmode)
   
            #Get Mel spectogram of audio
            spectrogram = librosa.feature.melspectrogram(audio,
                                                 sr=sampling_rate,
                                                 n_mels=n_mels,
                                                 hop_length=hop_length,
                                                 n_fft=n_fft,
                                                 fmin=fmin,
                                                 fmax=fmax)
            #Convert to log scale (DB)
            spectrogram = librosa.power_to_db(spectrogram)
            
            #Get MFCC and second derivatives
            mfcc = librosa.feature.mfcc(S=spectrogram, n_mfcc=n_mfcc)
            delta2_mfcc = librosa.feature.delta(mfcc, order=2)
            
            #Append MFCC to spectrogram and flatten
            features = np.concatenate((spectrogram,mfcc,delta2_mfcc),axis=0)
            X = features.ravel()
            
            return X
        except:
            spectrogram = np.zeros(((n_mels+2*n_mfcc)*47), dtype=np.float32)
            X = spectrogram.ravel()
            return X
github amogh3892 / Environmental-sound-recognition-using-combination-of-spectrogram-and-acoustic-features / environmentalSoundClassification / audioProcessingUtil.py View on Github external
:param y: audio data 
        :param sr: Sampling rate 
        :param n_fft: FFT length    
        :param hop_length: Hop length 
        :param n_mfcc: Number of MFCC coefficients. 
        :return: Audio feature matrix 
        """

        features = None

        #MFCCS
        mfccs =  librosa.feature.mfcc(y=y, sr=sr, n_mfcc = n_mfcc , n_fft = n_fft, hop_length = hop_length)
        features = mfccs

        #Delta mfccs
        delta_mfccs =  librosa.feature.delta(mfccs)
        features = np.concatenate((features,delta_mfccs))


        #rmse
        rmse =  librosa.feature.rmse(y=y , n_fft = n_fft , hop_length = hop_length)
        features = np.concatenate((features,rmse))


        #spectral centroid
        spectral_centroid =  librosa.feature.spectral_centroid(y=y, sr=sr, n_fft = n_fft, hop_length = hop_length )
        features = np.concatenate((features,spectral_centroid))


        #spectral bandwidth
        spectral_bandwidth =  librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft = n_fft, hop_length = hop_length)
        features = np.concatenate((features,spectral_bandwidth))
github johnmartinsson / bird-species-classification / bird / loader.py View on Github external
def load_segments(segments, target_size, input_data_mode):
    print(segments, target_size, input_data_mode)
    data = []
    for segment in segments:
        (fs, signal) = utils.read_wave_file(segment)
        if input_data_mode == "mfcc":
            sample = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])
            sample = scipy.misc.imresize(sample, target_size)
            sample = sample.reshape((sample.shape[0],
                                     sample.shape[1], 1))
        if input_data_mode == "mfcc_delta":
            mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])
            mfcc_delta_3 = librosa.feature.delta(mfcc, width=3, order=1)
            mfcc_delta_11 = librosa.feature.delta(mfcc, width=11, order=1)
            mfcc_delta_19 = librosa.feature.delta(mfcc, width=19, order=1)

            mfcc = scipy.misc.imresize(mfcc, target_size)
            mfcc_delta_3 = scipy.misc.imresize(mfcc_delta_3, target_size)
            mfcc_delta_11 = scipy.misc.imresize(mfcc_delta_11, target_size)
            mfcc_delta_19 = scipy.misc.imresize(mfcc_delta_19, target_size)

            mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1)
            mfcc_delta_3 = mfcc_delta_3.reshape(mfcc_delta_3.shape[0], mfcc_delta_3.shape[1], 1)
            mfcc_delta_11 = mfcc_delta_11.reshape(mfcc_delta_11.shape[0], mfcc_delta_11.shape[1], 1)
            mfcc_delta_19 = mfcc_delta_19.reshape(mfcc_delta_19.shape[0], mfcc_delta_19.shape[1], 1)
            sample = np.concatenate([mfcc, mfcc_delta_3, mfcc_delta_11, mfcc_delta_19], axis=2)

        if input_data_mode == "spectrogram":
            sample = sp.wave_to_sample_spectrogram(signal, fs)