How to use the librosa.core function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dodo1210 / tcc_code / elementos / spectral_contrast / spectral_contrast.py View on Github external
# 2. Load the audio as a waveform `y`
#    Store the sampling rate as `sr`
#captura da musica
arq = open('/home/douglas/Música/musicas/wav/tristes/tristes.txt','r')
lines = arq.readlines()
arq.close()

lista = []

count=0
for l in lines:
    #carregamento dos arquivos
    music, erro = l.split("\n",1)
    #VERIFIQUE O CAMINHO, POR FAVOR
    y, sr = librosa.load('/home/douglas/Música/musicas/wav/tristes/'+music,sr=44100)
    S = np.abs(librosa.core.stft(y, n_fft=2048, hop_length=512, win_length=1024, window='hann'))
    contrast = librosa.feature.spectral_contrast(S=S)
    print(music,contrast.mean())
    lista.append(contrast.mean())

arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','r')
musics = arq.readlines()
arq.close()


count=0
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','w')
for m in musics:
    music, erro = m.split("\n",1)
    arq.write(music+","+str(lista[count])+"\n")
    count+=1
github castorini / honk / keyword_spotting_data_generator / evaluation / utils / youtube_crawler.py View on Github external
def get_audio(self):
        temp_file_name = "temp_" + self.url.replace('_', '-')
        self.video.streams.first().download(filename=temp_file_name)

        if not os.path.isfile(temp_file_name + ".mp4"):
            raise Exception("crawled file is not in format of mp4")

        cmd = FFMPEG_TEMPLATE.format(temp_file_name).split()
        subprocess.check_output(cmd)

        audio_data = librosa.core.load(temp_file_name+".wav", 16000)[0]

        os.remove(temp_file_name + ".mp4")
        os.remove(temp_file_name + ".wav")

        return audio_data
github sonidosmutantes / apicultor / module / machine_learning / SoundSimilarity.py View on Github external
for s in list(os.walk(files_dir+'/centroid', topdown=False))[-1][-1]:
                 if str(t[0]).split('.')[0] == s.split('centroid.ogg')[0]:
                     shutil.copy(files_dir+'/centroid/'+s, files_dir+'/centroid/'+str(c)+'/'+s)     
                     print t 
 
        try: 
            simil_audio = [MonoLoader(filename=files_dir+'/centroid/'+str(c)+f)() for f in list(os.walk(files_dir+'/centroid/'+str(c), topdown = False))[-1][-1]]
            audio0 = scratch_music(choice(simil_audio))
            audio1 = scratch_music(choice(simil_audio)) 
            del simil_audio                               
            audio_N = min([len(i) for i in [audio0, audio1]])  
            audio_samples = [i[:audio_N]/i.max() for i in [audio0, audio1]]                                                  
            simil_x = np.array(audio_samples).sum(axis=0) 
            del audio_samples
            simil_x = 0.5*simil_x/simil_x.max()      
            h, p = librosa.decompose.hpss(librosa.core.stft(simil_x))
            del simil_x, p
            h = librosa.istft(h)                                                
            MonoWriter(filename=files_dir+'/centroid/'+str(c)+'/remix/'+'similarity_mix_centroid.ogg', format = 'ogg', sampleRate = 44100)(h)  
            del h
        except Exception, e:
            print e
            continue
github marl / massage / massage / resynth / util.py View on Github external
win_length : int default=4096
        window size for doing stft analysis
    theta : float default=0.15
        bias on the smoothed signal in the context of logistic function
        higher theta reduces envelope activation sensitivity
        lower theta increases envelope activation sensitivity

    Returns
    -------
    y_env : ndarray
        a vector specifying the amplitude envelope
    """
    S = librosa.stft(
        y_input, n_fft=win_length, hop_length=win_length,
        win_length=win_length)
    S_samples = librosa.core.frames_to_samples(
        range(len(S[0])), hop_length=win_length)
    y_smooth = np.mean(np.abs(S), axis=0)

    # normalization (to overall energy)
    if np.max(np.abs(y_smooth)) > 0:
        y_smooth = y_smooth / np.max(np.abs(y_smooth))

    # binary thresholding for low overall energy events
    y_smooth[y_smooth < thresh] = 0

    # LP filter
    b_coeff, a_coeff = scipy.signal.butter(2, lpf_cutoff, 'low')
    y_smooth = scipy.signal.filtfilt(b_coeff, a_coeff, y_smooth)

    # logistic function to semi-binarize the output; confidence value
    y_conf = 1.0 - (1.0 / (1.0 + np.exp(np.dot(alpha, (y_smooth - theta)))))
github Veleslavia / vimss / Utils.py View on Github external
def concat_and_upload(estimates_path, gsc_estimates_path, sr=22050):

    for root, dirs, files in os.walk(estimates_path):
        if not files:
            continue
        files.sort()
        audio_data = np.concatenate([librosa.core.load(os.path.join(root, name))[0] for name in files])
        librosa.output.write_wav(root+'.wav', audio_data, sr)
        for name in files:
            os.remove(os.path.join(root, name))
        os.rmdir(root)
        #upload_to_gcs([root+'.wav'], gsc_estimates_path)
github ciaua / InstrumentPlayingDetection / scripts / AudioSet / extract_melspec.target_time.py View on Github external
video_fp = os.path.join(video_dir, '{}.mp4'.format(youtube_id))

    # Fps
    out_audio_fp = os.path.join(out_audio_dir, '{}.mp3'.format(youtube_id))
    out_audio_feat_fp = os.path.join(out_audio_feat_dir,
                                     '{}.npy'.format(youtube_id))
    if os.path.exists(out_audio_feat_fp):
        print('Done before: {}'.format(video_fp))
        return

    # Extract audio
    get_audio_from_video(video_fp, out_audio_fp, sr)

    # Extract feature
    try:
        duration = librosa.core.get_duration(filename=out_audio_fp)
        if duration < time_range[1]:
            print('Audio too short: {}'.format(video_fp))
            return
        sig, sr = librosa.core.load(out_audio_fp, sr=sr,
                                    offset=time_range[0],
                                    duration=time_range[1]-time_range[0])
        feat_ = librosa.feature.melspectrogram(sig, sr=sr,
                                               n_fft=win_size,
                                               hop_length=hop_size,
                                               n_mels=num_mels).T
        feat = np.log(1+10000*feat_)
        np.save(out_audio_feat_fp, feat)
        print('Done: {} -- {}'.format(video_fp, youtube_id))

    except Exception as e:
        print('Exception in extracting feature: {}. {}'.format(video_fp,
github castorini / honkling / preprocessing.py View on Github external
def preprocess_audio(data, config):
    amp_spectrum = librosa.core.stft(data, n_fft=config["n_fft"], hop_length=config["hop_length"], pad_mode='constant')
    print_data('amp_spectrum data', amp_spectrum)

    # np.abs(D[f, t]) is the magnitude of frequency bin f at frame t
    power_spectrum = np.abs(amp_spectrum)**2
    print_data('power spectrogram data', power_spectrum)

    # corresponding librosa operations
    # in order to use pad mode = 'constant' for stft, melspectrogram must be computed manually as in this preprocessing script
    # default pad_mode for stft is reflection padding
    # S, _ = librosa.spectrum._spectrogram(y=data, n_fft=config["n_fft"], hop_length=config["hop_length"],
    #                         power=2)
    # print_data('power spectrogram generated through _spectrogram', S)

    mel_basis = librosa.filters.mel(sample_rate, n_fft=config["n_fft"], n_mels=config["n_mels"], fmin=config["fmin"], fmax=config["fmax"])
    print_data('mel_basis', mel_basis)
github emilio-molina / audio_degrader / audio_degrader / AudioFile.py View on Github external
def _create_tmp_mirror_file(self):
        out, err, returncode = run(
                'ffmpeg -y -i {0} -ac 2 -acodec pcm_f32le {1}'.format(
                    self.audio_path, self.tmp_path))
        self.samples, self.sample_rate = lr.core.load(self.tmp_path,
                                                      sr=None, mono=False)
        logging.debug(out)
        logging.debug(err)
github tensorflow / magenta / magenta / models / onsets_frames_transcription / data.py View on Github external
def _wav_to_cqt(wav_audio, hparams):
  """Transforms the contents of a wav file into a series of CQT frames."""
  y = audio_io.wav_data_to_samples(wav_audio, hparams.sample_rate)

  cqt = np.abs(
      librosa.core.cqt(
          y,
          hparams.sample_rate,
          hop_length=hparams.spec_hop_length,
          fmin=hparams.spec_fmin,
          n_bins=hparams.spec_n_bins,
          bins_per_octave=hparams.cqt_bins_per_octave),
      dtype=np.float32)

  # Transpose so that the data is in [frame, bins] format.
  cqt = cqt.T
  return cqt
github sekiguchi92 / SpeechEnhancement / DeepSpeechPrior / make_dataset_wsj0.py View on Github external
def make_dataset(wsj0_path=WSJ0_PATH, dataset_save_path=DATASET_SAVE_PATH):
    dataset_fileName = dataset_save_path + '/wsj0_normalize_{}_{}.pic'.format(N_FFT, HOP_LENGTH)
    if os.path.isfile(dataset_fileName):
        print(dataset_fileName, " already exist. Skip this phase.")
        return 0

    print("Start making dataset ...")
    dataset = []
    for fileName in progressbar(glob.glob(wsj0_path + "/*.wav")):
        wav, _ = snd.read(fname)
        pwr_spec_FT  = np.abs(librosa.core.stft(wav, n_fft=args.n_fft, hop_length=args.hop_length)) ** 2
        vad_result = vad(pwr_spec_FT)
        pwr_spec_FT /= (pwr_spec_FT.sum(axis=0)[vad(pwr_spec_FT)]).mean()
        dataset.append(np.array(pwr_spec_FT, dtype=np.float32))

    dataset = np.hstack(dataset)

    print("Writing to pickle file ...")
    pic.dump(dataset, open(dataset_fileName, 'wb'), protocol=4)