How to use the librosa.effects function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sony / nnabla-examples / speech-synthesis / WaveNet / dataset.py View on Github external
def _get_data(self, position):
        index = self._indexes[position]

        flac_path, speaker, _ = self._data_list[index]

        # all values of data are between [-1, 1]
        data, sr = sf.read(flac_path)

        data, _ = librosa.effects.trim(data, top_db=20)

        # clip
        if len(data) < self.duration:
            lack = self.duration - len(data)
            before = lack // 2
            after = lack // 2 + lack % 2
            clipped = np.pad(data, pad_width=(before, after), mode="constant")

        else:
            start = np.random.randint(0, len(data) - self.duration)
            clipped = data[start:start + self.duration]

        # shape of clipped == (T,)

        quantized = mu_law_encode(clipped)
github CSTR-Edinburgh / ophelia / script / split_speech.py View on Github external
wav, fs = soundfile.read(wav_path)  ## TODO: assert mono
    pad = int(pad_sec * fs)
    end_pad = int(end_pad_sec * fs)
    # print pad
    base = get_basename(wav_path)
    # print base
    _, (start, end) = librosa.effects.trim(wav, top_db=top_db)
    start = max(0, (start - end_pad))
    end = min(len(wav), (end + end_pad))
    wav = wav[start:end]
    if trimonly:
        ofile = os.path.join(out_dir, base + '.wav')
        soundfile.write(ofile, wav, fs)
    else:
        starts_ends = librosa.effects.split(wav, top_db=top_db)
        starts_ends[:,0] -= pad
        starts_ends[:,1] += pad
        starts_ends = np.clip(starts_ends, 0, wav.size)
        lengths = starts_ends[:,1] - starts_ends[:,0]
        starts_ends = starts_ends[lengths > fs * minimum_duration_sec]


        for (i, (s,e)) in enumerate(starts_ends):

            ofile = os.path.join(out_dir, base + '_seg%s.wav'%(str(i+1).zfill(4)))
            # print ofile
            soundfile.write(ofile, wav[s:e], fs)
github willfrey / audio / torchaudio / transforms.py View on Github external
def __call__(self, y):
        return librosa.effects.pitch_shift(y, **self.__dict__)
github corticph / MSTmodel / EndToEndClassification / Dataset / esc50_processor.py View on Github external
audio (np.array): audio segment.
        sample_rate (int): sample rate.

    Returns:
        (np.array): 'augmented' audio segment.
    """

    limits = ((0, 0), (1.0, 1.0))  # pitch shift in half-steps, time stretch

    pitch_shift = np.random.randint(limits[0][1], limits[0][1] + 1)
    time_stretch = np.random.random() * (limits[1][1] - limits[1][0]) + limits[1][0]
    time_shift = np.random.randint(sample_rate)

    augmented_audio = np.hstack((np.zeros((time_shift)),
                                 librosa.effects.time_stretch(
                                     librosa.effects.pitch_shift(audio, sample_rate, pitch_shift),
                                     time_stretch)))

    return augmented_audio
github Yangyangii / DeepConvolutionalTTS-pytorch / utils.py View on Github external
mag = mag.T

    # de-normalize
    mag = (np.clip(mag, 0, 1) * args.max_db) - args.max_db + args.ref_db

    # to amplitude
    mag = np.power(10.0, mag * 0.05)

    # wav reconstruction
    wav = griffin_lim(mag**args.power)

    # de-preemphasis
    wav = signal.lfilter([1], [1, -args.preemph], wav)

    # trim
    wav, _ = librosa.effects.trim(wav)

    return wav.astype(np.float32)
github NVIDIA / OpenSeq2Seq / open_seq2seq / data / text2speech / speech_utils.py View on Github external
# load audio signal
  signal, fs = librosa.core.load(filename, sr=None)
  if hop_length is None:
    hop_length = int(n_fft / 4)
  if trim:
    signal, _ = librosa.effects.trim(
        signal,
        frame_length=int(n_fft/2),
        hop_length=int(hop_length/2)
    )

  if augmentation is not None:
    if 'pitch_shift_steps' in augmentation:
      pitch_shift_steps = (2.0 * augmentation['pitch_shift_steps'] * \
          np.random.rand()) - augmentation['pitch_shift_steps']
      signal = librosa.effects.pitch_shift(signal, fs, pitch_shift_steps)

    if augmentation['time_stretch_ratio'] > 0:
      # time stretch
      stretch_amount = 1.0 + (2.0 * np.random.rand() - 1.0) * \
          augmentation['time_stretch_ratio']
      signal = rs.resample(
          signal,
          fs,
          int(fs * stretch_amount),
          filter='kaiser_fast',
      )

      # noise
      noise_level_db = np.random.randint(
          low=augmentation['noise_level_min'],
          high=augmentation['noise_level_max']
github qlemaire22 / speech-music-detection / smd / data / data_augmentation / pitch_time.py View on Github external
def time_stretching_audio(audio, rate=None):
    if rate is None:
        rate = random.uniform(config.STRETCHING_MIN, config.STRETCHING_MAX)
    return librosa.effects.time_stretch(audio, rate), rate
github csteinmetz1 / MixCNN / pre_process.py View on Github external
stretch_left = librosa.effects.time_stretch(y_left, factor)
                    stretch_right = librosa.effects.time_stretch(y_right, factor)
                stretch = np.stack((stretch_left, stretch_right), axis=0)
                #stretch = np.reshape(stretch, (stretch.shape[1], stretch.shape[0]))
                filename = "{}.wav".format(stem_class)
                librosa.output.write_wav(os.path.join(song, "augmented", subdir, filename), stretch, sr)
                sys.stdout.write(" Stretching by {: >4}     \r".format(factor))
                sys.stdout.flush()

            for semitones in [0.5]: #[-1, -0.5, 0.5, 1]:
                subdir = "shift_{}".format(semitones)
                if not os.path.isdir(os.path.join(song, "augmented", subdir)):
                    os.makedirs(os.path.join(song, "augmented", subdir))
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore", category=FutureWarning)
                    shift_left = librosa.effects.pitch_shift(y_left, sr, n_steps=semitones)
                    shift_right = librosa.effects.pitch_shift(y_right, sr, n_steps=semitones)
                shift = np.stack((shift_left, shift_right), axis=0)
                #shift = np.reshape(shift, (shift.shape[1], shift.shape[0]))
                filename = "{}.wav".format(stem_class)
                librosa.output.write_wav(os.path.join(song, "augmented", subdir, filename), shift, sr)
                sys.stdout.write(" Shifting by {: >2}      \r".format(semitones))
                sys.stdout.flush()
github Kyubyong / deepvoice3 / utils.py View on Github external
# de-noramlize
    mag = (np.clip(mag, 0, 1) * hp.max_db) - hp.max_db + hp.ref_db

    # to amplitude
    mag = librosa.db_to_amplitude(mag)
    # print(np.max(mag), np.min(mag), mag.shape)
    # (1025, 812, 16)

    # wav reconstruction
    wav = griffin_lim(mag)

    # de-preemphasis
    wav = signal.lfilter([1], [1, -hp.preemphasis], wav)

    # trim
    wav, _ = librosa.effects.trim(wav)

    return wav