How to use the librosa.core.stft function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github emlearn / emlearn / test / test_audio.py View on Github external
def test_melfilter_librosa():
    filename = librosa.util.example_audio_file()
    y, sr = librosa.load(filename, offset=1.0, duration=0.3)
    n_fft = 1024
    hop_length = 256
    fmin = 500
    fmax = 5000
    n_mels = 16

    spec = numpy.abs(librosa.core.stft(y, n_fft=n_fft, hop_length=hop_length))**2
    spec1 = spec[:,0]

    ref = librosa.feature.melspectrogram(S=spec1, sr=sr, norm=None, htk=True, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax)
    out = eml_audio.melfilter(spec1, sr, n_fft, n_mels, fmin, fmax)

    fig, (ref_ax, out_ax) = plt.subplots(2)
    def specshow(d, ax):
        s = librosa.amplitude_to_db(d, ref=numpy.max)
        librosa.display.specshow(s, ax=ax, x_axis='time')
    specshow(ref.reshape(-1, 1), ax=ref_ax)
    specshow(out.reshape(-1, 1), ax=out_ax)
    fig.savefig('melfilter.librosa.png')

    assert ref.shape == out.shape
    numpy.testing.assert_allclose(ref, out, rtol=0.01)
github dodo1210 / tcc_code / elementos / mfcc / mfcc.py View on Github external
# 2. Load the audio as a waveform `y`
#    Store the sampling rate as `sr`
#captura da musica
arq = open('/home/douglas/Música/musicas/wav/tristes/tristes.txt','r')
lines = arq.readlines()
arq.close()

lista = []

count=0
for l in lines:
    #carregamento dos arquivos
    music, erro = l.split("\n",1)
    #VERIFIQUE O CAMINHO, POR FAVOR
    y, sr = librosa.load('/home/douglas/Música/musicas/wav/tristes/'+music,sr=44100)
    S = np.abs(librosa.core.stft(y, n_fft=1024, hop_length=512, win_length=1024, window='hann'))
    a = librosa.feature.mfcc(S=librosa.power_to_db(S),n_mfcc=1)
    print(music,a.mean())
    lista.append(a.mean())
    
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','r')
musics = arq.readlines()
arq.close()


count=0
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','w')
for m in musics:
    music, erro = m.split("\n",1)
    arq.write(music+","+str(lista[count])+"\n")
    count+=1
github sonidosmutantes / apicultor / module / machine_learning / SoundSimilarity.py View on Github external
for s in list(os.walk(files_dir+'/inharmonicity', topdown=False))[-1][-1]:
                 if str(t[0]).split('.')[0] == s.split('inharmonicity.ogg')[0]:
                     shutil.copy(files_dir+'/inharmonicity/'+s, files_dir+'/inharmonicity/'+str(c)+'/'+s)     
                     print t 
 
        try:
            simil_audio = [MonoLoader(filename=files_dir+'/inharmonicity/'+str(c)+f)() for f in list(os.walk(files_dir+'/inharmonicity/'+str(c), topdown = False))[-1][-1]]
            audio0 = scratch_music(choice(simil_audio))
            audio1 = scratch_music(choice(simil_audio)) 
            del simil_audio                               
            audio_N = min([len(i) for i in [audio0, audio1]])  
            audio_samples = [i[:audio_N]/i.max() for i in [audio0, audio1]]                                         
            simil_x = np.array(audio_samples).sum(axis=0) 
            del audio_samples
            simil_x = 0.5*simil_x/simil_x.max()      
            h, p = librosa.decompose.hpss(librosa.core.stft(simil_x))
            del simil_x, p
            h = librosa.istft(h)                                                
            MonoWriter(filename=files_dir+'/inharmonicity/'+str(c)+'/remix/'+'similarity_mix_inharmonicity.ogg', format = 'ogg', sampleRate = 44100)(h)  
            del h
        except Exception, e:
            print e
            continue
github sonidosmutantes / apicultor / module / machine_learning / SoundSimilarity.py View on Github external
for s in list(os.walk(files_dir+'/valleys', topdown=False))[-1][-1]:
                 if str(t[0]).split('.')[0] == s.split('valleys.ogg')[0]:
                     shutil.copy(files_dir+'/valleys/'+s, files_dir+'/valleys/'+str(c)+'/'+s)     
                     print t 
 
        try:
            simil_audio = [MonoLoader(filename=files_dir+'/valleys/'+str(c)+f)() for f in list(os.walk(files_dir+'/valleys/'+str(c), topdown = False))[-1][-1]]
            audio0 = scratch_music(choice(simil_audio))
            audio1 = scratch_music(choice(simil_audio)) 
            del simil_audio                               
            audio_N = min([len(i) for i in [audio0, audio1]])  
            audio_samples = [i[:audio_N]/i.max() for i in [audio0, audio1]]                                                  
            simil_x = np.array(audio_samples).sum(axis=0) 
            del audio_samples
            simil_x = 0.5*simil_x/simil_x.max()      
            h, p = librosa.decompose.hpss(librosa.core.stft(simil_x))
            del simil_x, p
            h = librosa.istft(h)                                                
            MonoWriter(filename=files_dir+'/valleys/'+str(c)+'/remix/'+'similarity_mix_valleys.ogg', format = 'ogg', sampleRate = 44100)(h)  
            del h
        except Exception, e:
            print e
            continue
github 1eedaegon / KYLius-method / x_ksh / stft_processing.py View on Github external
def see_how_long(file):
    c=[]
    for filename in file:
        y, sr = librosa.core.load(path+filename, mono=True, res_type="kaiser_fast")
        stft=librosa.core.stft(y,1024,512)        
        abs_stft=np.abs(stft)
        #1025 X t 형태
        c.append(abs_stft.shape[1])
    return(c)
github yjlolo / vae-audio / dataset / transformers.py View on Github external
def __call__(self, x):
        assert self.spec_type in ['lin', 'mel', 'cqt'], "spec_type should be in ['lin', 'mel', 'cqt']"
        if self.spec_type == 'lin':
            S = librosa.core.stft(y=x, n_fft=self.n_fft, hop_length=self.hop_size)
            S = np.abs(S) ** 2  # power spectrogram

        elif self.spec_type == 'mel':
            S = librosa.feature.melspectrogram(y=x, sr=self.sr, n_fft=self.n_fft,
                                               hop_length=self.hop_size, n_mels=self.n_band)
            # melspectrogram has raised np.abs(S)**power, default power=2
            # so power_to_db is directly applicable
            S = librosa.core.power_to_db(S, ref=np.max)
        else:
            # TODO: implement CQT
            raise NotImplementedError

        return S
github jhetherly / EnglishSpeechUpsampler / plots / plot_comparative_spectrogram.py View on Github external
def read_audio_spectrum(x, **kwd_args):
    return librosa.core.stft(x, **kwd_args)
github colinsongf / keyword_spotting / process_wav.py View on Github external
def process_stft(f):

    y, sr = librosa.load(f, sr=config.samplerate)
    if config.pre_emphasis:
        y = pre_emphasis(y)
    linearspec = np.transpose(np.abs(
        librosa.core.stft(y, config.fft_size,
                          config.hop_size)))

    return linearspec, y
github supikiti / PNCC / pncc.py View on Github external
def pncc(audio_wave, n_fft=512, sr=16000, winlen=0.020, winstep=0.010,
         n_mels=128, n_pncc=13, weight_N=4, power=2):

    pre_emphasis_signal = scipy.signal.lfilter([1.0, -0.97], 1, audio_wave)
    mono_wave = to_mono(pre_emphasis_signal.T)
    stft_pre_emphasis_signal = np.abs(stft(mono_wave,
                                           n_fft=n_fft,
                                           hop_length=int(sr * winstep),
                                           win_length=int(sr * winlen),
                                           window=np.ones(int(sr * winlen)),
                                           center=False)) ** power

    mel_filter = np.abs(filters.mel(sr, n_fft=n_fft, n_mels=n_mels)) ** power
    power_stft_signal = np.dot(stft_pre_emphasis_signal.T, mel_filter.T)

    medium_time_power = medium_time_power_calculation(power_stft_signal)

    lower_envelope = asymmetric_lawpass_filtering(
        medium_time_power, 0.999, 0.5)

    subtracted_lower_envelope = medium_time_power - lower_envelope
github bjfu-ai-institute / speaker-recognition-papers / pyasv / speech.py View on Github external
def get_mag(url):
            y, _ = librosa.load(url, sr=sample_rate)
            complex_spec = librosa.core.stft(y, n_fft=n_fft, win_length=n_fft, hop_length=hop_length)
            mag_spec = np.abs(complex_spec)
            if use_log:  mag_spec = db_func(mag_spec)
            return mag_spec
        def get_label(data):