Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
resample(rec_silence[:, i], fs / fs_file, 'sinc_best'),
fs,
fc=150.
)
)
speech_signals = np.array(resampled_signals, dtype=np.float).T
silence = np.array(resampled_silence, dtype=np.float).T
else:
print('No need to resample signals')
speech_signals = np.array(rec_signals[:, R_flat_I], dtype=np.float32)
silence = np.array(rec_silence[:, R_flat_I], dtype=np.float32)
# highpass filter at 150
for s in speech_signals.T:
s[:] = pra.highpass(s, fs, fc=150.)
for s in silence.T:
s[:] = pra.highpass(s, fs, fc=150.)
# Normalize the amplitude
n_factor = 0.95 / np.max(np.abs(speech_signals))
speech_signals *= n_factor
silence *= n_factor
# stft window
win_stft = np.hanning(fft_size)
# estimate noise floor
y_noise_stft = []
for k in range(num_mic):
y_stft = pra.stft(silence[:, k], fft_size, frame_shift_step,
transform=rfft, win=win_stft).T / np.sqrt(fft_size)
if fs_file != fs:
print 'Resampling signals'
from scikits.samplerate import resample
resampled_signals = []
resampled_silence = []
for i in R_flat_I:
resampled_signals.append(
pra.highpass(
resample(rec_signals[:, i], fs / fs_file, 'sinc_best'),
fs,
fc=150.
)
)
resampled_silence.append(
pra.highpass(
resample(rec_silence[:, i], fs / fs_file, 'sinc_best'),
fs,
fc=150.
)
)
speech_signals = np.array(resampled_signals, dtype=np.float).T
silence = np.array(resampled_silence, dtype=np.float).T
else:
print('No need to resample signals')
speech_signals = np.array(rec_signals[:, R_flat_I], dtype=np.float32)
silence = np.array(rec_silence[:, R_flat_I], dtype=np.float32)
# highpass filter at 150
for s in speech_signals.T:
s[:] = pra.highpass(s, fs, fc=150.)
'''
# compute beamforming filters
mics = pra.Beamformer(R, Fs, N=N, Lg=Lg)
room1.add_microphone_array(mics)
room1.compute_rir()
room1.simulate()
mics.rake_mvdr_filters(room1.sources[0][0:1],
room1.sources[1][0:1],
sigma2_n*np.eye(mics.Lg*mics.M), delay=delay)
# process the signal
output = mics.process()
# save to output file
input_mic = pra.normalize(pra.highpass(mics.signals[mics.M//2], Fs))
wavfile.write(path + '/output_samples/input.wav', Fs, input_mic)
out_DirectMVDR = pra.normalize(pra.highpass(output, Fs))
wavfile.write(path + '/output_samples/output_DirectMVDR.wav', Fs, out_DirectMVDR)
'''
Rake MVDR simulation
'''
# Add the microphone array and compute RIR
mics = pra.Beamformer(R, Fs, N, Lg=Lg)
room1.add_microphone_array(mics)
room1.compute_rir()
room1.simulate()
'''
# compute beamforming filters
mics = pra.Beamformer(R, Fs, N, Lg=Lg)
room1.add_microphone_array(mics)
room1.compute_rir()
room1.simulate()
mics.rake_perceptual_filters(room1.sources[0][0:1],
room1.sources[1][0:1],
sigma2_n*np.eye(mics.Lg*mics.M), delay=delay)
# process the signal
output = mics.process()
# save to output file
out_DirectPerceptual = pra.normalize(pra.highpass(output, Fs))
wavfile.write(path + '/output_samples/output_DirectPerceptual.wav', Fs, out_DirectPerceptual)
'''
Rake Perceptual simulation
'''
# compute beamforming filters
mics = pra.Beamformer(R, Fs, N, Lg=Lg)
room1.add_microphone_array(mics)
room1.compute_rir()
room1.simulate()
mics.rake_perceptual_filters(good_sources,
bad_sources,
sigma2_n*np.eye(mics.Lg*mics.M), delay=delay)
# process the signal
# number of sources
K = len(sources)
# Import speech signal
fs_file, rec_signals = wavfile.read(filename)
# sanity check
if pmt['fs'] != fs_file:
raise ValueError('The sampling frequency of the files doesn''t match that of the script')
speech_signals = np.array(rec_signals[:,pmt['mic_select']], dtype=np.float32)
# Remove the DC bias
for s in speech_signals.T:
s[:] = pra.highpass(s, pmt['fs'], 100.)
if pmt['stft_win']:
stft_win = np.hanning(pmt['nfft'])
else:
stft_win = None
# Normalize the amplitude
speech_signals *= pmt['scaling']
# Compute STFT of signal
# -------------------------
y_mic_stft = []
for k in range(speech_signals.shape[1]):
y_stft = pra.stft(speech_signals[:, k], pmt['nfft'], pmt['stft_hop'],
transform=rfft, win=stft_win).T / np.sqrt(pmt['nfft'])
y_mic_stft.append(y_stft)
fs_file, rec_signals = wavfile.read(filename)
fs_silence, rec_silence = wavfile.read(rec_folder + 'silence.wav')
if fs_file != fs_silence:
raise ValueError('Weird: fs of signals and silence are different...')
# Resample the files if required
if fs_file != fs:
print 'Resampling signals'
from scikits.samplerate import resample
resampled_signals = []
resampled_silence = []
for i in R_flat_I_subset:
resampled_signals.append(
pra.highpass(
resample(rec_signals[:, i], fs / fs_file, 'sinc_best'),
fs,
fc=150.
)
)
resampled_silence.append(
pra.highpass(
resample(rec_silence[:, i], fs / fs_file, 'sinc_best'),
fs,
fc=150.
)
)
speech_signals = np.array(resampled_signals, dtype=np.float).T
silence = np.array(resampled_silence, dtype=np.float).T
else:
if fs_file != fs:
print 'Resampling signals'
from scikits.samplerate import resample
resampled_signals = []
resampled_silence = []
for i in R_flat_I_subset:
resampled_signals.append(
pra.highpass(
resample(rec_signals[:, i], fs / fs_file, 'sinc_best'),
fs,
fc=150.
)
)
resampled_silence.append(
pra.highpass(
resample(rec_silence[:, i], fs / fs_file, 'sinc_best'),
fs,
fc=150.
)
)
speech_signals = np.array(resampled_signals, dtype=np.float).T
silence = np.array(resampled_silence, dtype=np.float).T
else:
print('No need to resample signals')
speech_signals = np.array(rec_signals[:, R_flat_I_subset], dtype=np.float32)
silence = np.array(rec_silence[:, R_flat_I_subset], dtype=np.float32)
# highpass filter at 150
for s in speech_signals.T:
s[:] = pra.highpass(s, fs, fc=150.)
fc=150.
)
)
speech_signals = np.array(resampled_signals, dtype=np.float).T
silence = np.array(resampled_silence, dtype=np.float).T
else:
print('No need to resample signals')
speech_signals = np.array(rec_signals[:, R_flat_I_subset], dtype=np.float32)
silence = np.array(rec_silence[:, R_flat_I_subset], dtype=np.float32)
# highpass filter at 150
for s in speech_signals.T:
s[:] = pra.highpass(s, fs, fc=150.)
for s in silence.T:
s[:] = pra.highpass(s, fs, fc=150.)
# Normalize the amplitude
n_factor = 0.95 / np.max(np.abs(speech_signals))
speech_signals *= n_factor
silence *= n_factor
# estimate noise floor
y_noise_stft = []
for k in range(num_mic):
y_stft = pra.stft(silence[:, k], fft_size, frame_shift_step,
transform=rfft, win=win_stft).T / np.sqrt(fft_size)
y_noise_stft.append(y_stft)
y_noise_stft = np.array(y_noise_stft)
noise_floor = np.mean(np.abs(y_noise_stft) ** 2)
# estimate SNR in dB (on 1st microphone)
# path to samples
path = os.path.dirname(__file__)
# The first signal (of interest) is singing
rate1, signal1 = wavfile.read(path + '/input_samples/singing_'+str(Fs)+'.wav')
signal1 = np.array(signal1, dtype=float)
signal1 = pra.normalize(signal1)
signal1 = pra.highpass(signal1, Fs)
delay1 = 0.
# The second signal (interferer) is some german speech
rate2, signal2 = wavfile.read(path + '/input_samples/german_speech_'+str(Fs)+'.wav')
signal2 = np.array(signal2, dtype=float)
signal2 = pra.normalize(signal2)
signal2 = pra.highpass(signal2, Fs)
delay2 = 1.
# Create the room
room_dim = [4, 6]
room1 = pra.ShoeBox(
room_dim,
absorption=absorption,
fs=Fs,
t0=t0,
max_order=max_order_sim,
sigma2_awgn=sigma2_n)
# Add sources to room
good_source = np.array([1, 4.5]) # good source
normal_interferer = np.array([2.8, 4.3]) # interferer
room1.add_source(good_source, signal=signal1, delay=delay1)