Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _get_data(self, position):
index = self._indexes[position]
flac_path, speaker, _ = self._data_list[index]
# all values of data are between [-1, 1]
data, sr =
data, _ = librosa.effects.trim(data, top_db=20)
# clip
if len(data) < self.duration:
lack = self.duration - len(data)
before = lack // 2
after = lack // 2 + lack % 2
clipped = np.pad(data, pad_width=(before, after), mode="constant")
start = np.random.randint(0, len(data) - self.duration)
clipped = data[start:start + self.duration]
# shape of clipped == (T,)
quantized = mu_law_encode(clipped)
wav, fs = ## TODO: assert mono
pad = int(pad_sec * fs)
end_pad = int(end_pad_sec * fs)
# print pad
base = get_basename(wav_path)
# print base
_, (start, end) = librosa.effects.trim(wav, top_db=top_db)
start = max(0, (start - end_pad))
end = min(len(wav), (end + end_pad))
wav = wav[start:end]
if trimonly:
ofile = os.path.join(out_dir, base + '.wav')
soundfile.write(ofile, wav, fs)
starts_ends = librosa.effects.split(wav, top_db=top_db)
starts_ends[:,0] -= pad
starts_ends[:,1] += pad
starts_ends = np.clip(starts_ends, 0, wav.size)
lengths = starts_ends[:,1] - starts_ends[:,0]
starts_ends = starts_ends[lengths > fs * minimum_duration_sec]
for (i, (s,e)) in enumerate(starts_ends):
ofile = os.path.join(out_dir, base + '_seg%s.wav'%(str(i+1).zfill(4)))
# print ofile
soundfile.write(ofile, wav[s:e], fs)
def __call__(self, y):
return librosa.effects.pitch_shift(y, **self.__dict__)
audio (np.array): audio segment.
sample_rate (int): sample rate.
(np.array): 'augmented' audio segment.
limits = ((0, 0), (1.0, 1.0)) # pitch shift in half-steps, time stretch
pitch_shift = np.random.randint(limits[0][1], limits[0][1] + 1)
time_stretch = np.random.random() * (limits[1][1] - limits[1][0]) + limits[1][0]
time_shift = np.random.randint(sample_rate)
augmented_audio = np.hstack((np.zeros((time_shift)),
librosa.effects.pitch_shift(audio, sample_rate, pitch_shift),
return augmented_audio
mag = mag.T
# de-normalize
mag = (np.clip(mag, 0, 1) * args.max_db) - args.max_db + args.ref_db
# to amplitude
mag = np.power(10.0, mag * 0.05)
# wav reconstruction
wav = griffin_lim(mag**args.power)
# de-preemphasis
wav = signal.lfilter([1], [1, -args.preemph], wav)
# trim
wav, _ = librosa.effects.trim(wav)
return wav.astype(np.float32)
# load audio signal
signal, fs = librosa.core.load(filename, sr=None)
if hop_length is None:
hop_length = int(n_fft / 4)
if trim:
signal, _ = librosa.effects.trim(
if augmentation is not None:
if 'pitch_shift_steps' in augmentation:
pitch_shift_steps = (2.0 * augmentation['pitch_shift_steps'] * \
np.random.rand()) - augmentation['pitch_shift_steps']
signal = librosa.effects.pitch_shift(signal, fs, pitch_shift_steps)
if augmentation['time_stretch_ratio'] > 0:
# time stretch
stretch_amount = 1.0 + (2.0 * np.random.rand() - 1.0) * \
signal = rs.resample(
int(fs * stretch_amount),
# noise
noise_level_db = np.random.randint(
def time_stretching_audio(audio, rate=None):
if rate is None:
rate = random.uniform(config.STRETCHING_MIN, config.STRETCHING_MAX)
return librosa.effects.time_stretch(audio, rate), rate
stretch_left = librosa.effects.time_stretch(y_left, factor)
stretch_right = librosa.effects.time_stretch(y_right, factor)
stretch = np.stack((stretch_left, stretch_right), axis=0)
#stretch = np.reshape(stretch, (stretch.shape[1], stretch.shape[0]))
filename = "{}.wav".format(stem_class)
librosa.output.write_wav(os.path.join(song, "augmented", subdir, filename), stretch, sr)
sys.stdout.write(" Stretching by {: >4} \r".format(factor))
for semitones in [0.5]: #[-1, -0.5, 0.5, 1]:
subdir = "shift_{}".format(semitones)
if not os.path.isdir(os.path.join(song, "augmented", subdir)):
os.makedirs(os.path.join(song, "augmented", subdir))
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
shift_left = librosa.effects.pitch_shift(y_left, sr, n_steps=semitones)
shift_right = librosa.effects.pitch_shift(y_right, sr, n_steps=semitones)
shift = np.stack((shift_left, shift_right), axis=0)
#shift = np.reshape(shift, (shift.shape[1], shift.shape[0]))
filename = "{}.wav".format(stem_class)
librosa.output.write_wav(os.path.join(song, "augmented", subdir, filename), shift, sr)
sys.stdout.write(" Shifting by {: >2} \r".format(semitones))
# de-noramlize
mag = (np.clip(mag, 0, 1) * hp.max_db) - hp.max_db + hp.ref_db
# to amplitude
mag = librosa.db_to_amplitude(mag)
# print(np.max(mag), np.min(mag), mag.shape)
# (1025, 812, 16)
# wav reconstruction
wav = griffin_lim(mag)
# de-preemphasis
wav = signal.lfilter([1], [1, -hp.preemphasis], wav)
# trim
wav, _ = librosa.effects.trim(wav)
return wav