How to use the pyroomacoustics.stft function in pyroomacoustics

To help you get started, we’ve selected a few pyroomacoustics examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github LCAV / FRIDA / test_doa_recorded.py View on Github external
y_noise_stft.append(y_stft)
    y_noise_stft = np.array(y_noise_stft)
    noise_floor = np.mean(np.abs(y_noise_stft) ** 2)

    # estimate SNR in dB (on 1st microphone)
    noise_var = np.mean(np.abs(silence) ** 2)
    sig_var = np.mean(np.abs(speech_signals) ** 2)
    # rought estimate of SNR
    SNR = 10 * np.log10((sig_var - noise_var) / noise_var)
    print('Estimated SNR: ' + str(SNR))

    # Compute DFT of snapshots
    # -------------------------
    y_mic_stft = []
    for k in range(num_mic):
        y_stft = pra.stft(speech_signals[:, k], fft_size, frame_shift_step,
                          transform=rfft, win=win_stft).T / np.sqrt(fft_size)
        y_mic_stft.append(y_stft)
    y_mic_stft = np.array(y_mic_stft)

    energy_level = np.abs(y_mic_stft) ** 2

    # True direction of arrival
    # -------------------------
    sources = rec_file.split('-')
    phi_ks = np.array([twitters.doa(array_str, sources[k])[0] for k in range(K)])
    phi_ks[phi_ks < 0] = phi_ks[phi_ks < 0] + 2 * np.pi

    # create DOA object
    if algo == 1:
        algo_name = 'SRP-PHAT'
        d = doa.SRP(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c,
github LCAV / FRIDA / test_stft_speech_record.py View on Github external
# algorithm parameters
    stop_cri = 'max_iter'
    fft_size = 256  # number of FFT bins
    n_bands = 6
    M = 15  # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2

    # index of array where the microphones are on the same plane
    array_flat_idx = range(8, 16) + range(24, 32) + range(40, 48)
    mic_array_coordinate = R_pyramic[:, array_flat_idx]
    num_mic = mic_array_coordinate.shape[1]  # number of microphones

    frame_shift_step = np.int(fft_size / 1.)
    y_mic_stft = []
    for mic_count in array_flat_idx:
        y_mic_stft_loop = pra.stft(speech_signals[:, mic_count], fft_size,
                                   frame_shift_step, transform=rfft).T / np.sqrt(fft_size)
        y_mic_stft.append(y_mic_stft_loop)

    y_mic_stft = np.array(y_mic_stft)

    # Subband selection
    # TODO: check why the selected subbands frequencies are so 'quantised'
    # bands_pwr = np.mean(np.mean(np.abs(y_mic_stft) ** 2, axis=0), axis=1)
    # fft_bins = np.argsort(bands_pwr)[-n_bands - 1:-1]
    # ======================================
    max_baseline = 0.25  # <== calculated based on array layout
    f_array_tuning = (2.5 * speed_sound) / (0.5 * max_baseline)
    # print f_array_tuning
    # freq_hz = np.linspace(200., 5500., n_bands)  #np.array([800, 1100, 1550, 2100, 2300, 2700])  #
    freq_hz = np.linspace(200., 6000., n_bands)  # np.array([800, 1100, 1550, 2100, 2300, 2700])  #
    # freq_hz = np.logspace(np.log10(700.), np.log10(8000.), n_bands)
github LCAV / FRIDA / tools / generators.py View on Github external
# Now generate some noise
    # source_signal = np.random.randn(K, Ns * fft_size) * np.sqrt(alpha_ks[:, np.newaxis])
    source_signal = np.random.randn(K, fft_size + (Ns - 1) * frame_shift_step) * \
                    np.sqrt(np.reshape(alpha_ks, (-1, 1), order='F'))

    # Now generate all the microphone signals
    y = np.zeros((num_mic, source_signal.shape[1] + impulse_response.shape[2] - 1), dtype=np.float32)
    for src in xrange(K):
        for mic in xrange(num_mic):
            y[mic] += fftconvolve(impulse_response[src, mic], source_signal[src])

    # Now do the short time Fourier transform
    # The resulting signal is M x fft_size/2+1 x number of frames
    y_hat_stft_noiseless = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y]) / np.sqrt(fft_size)

    # compute noise variance based on SNR
    signal_energy = linalg.norm(y_hat_stft_noiseless.flatten()) ** 2
    noise_energy = signal_energy / 10 ** (SNR * 0.1)
    sigma2_noise = noise_energy / y_hat_stft_noiseless.size

    # Add noise to the signals
    y_noisy = y + np.sqrt(sigma2_noise) * np.array(np.random.randn(*y.shape), dtype=np.float32)

    y_hat_stft = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y_noisy]) / np.sqrt(fft_size)

    return y_hat_stft, y_hat_stft_noiseless
github LCAV / pyroomacoustics / examples / beamforming_time_domain.py View on Github external
# compute time-frequency planes
F0 = pra.stft(input_clean, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F1 = pra.stft(input_mic, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F2 = pra.stft(out_DirectMVDR, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F3 = pra.stft(out_RakeMVDR, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F4 = pra.stft(out_DirectPerceptual, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F5 = pra.stft(out_RakePerceptual, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)

# (not so) fancy way to set the scale to avoid having the spectrum
# dominated by a few outliers
p_min = 7
p_max = 100
all_vals = np.concatenate((pra.dB(F1+pra.eps), 
                           pra.dB(F2+pra.eps), 
                           pra.dB(F3+pra.eps),
                           pra.dB(F0+pra.eps),
                           pra.dB(F4+pra.eps),
                           pra.dB(F5+pra.eps))).flatten()
github LCAV / pyroomacoustics / examples / beamforming_time_domain.py View on Github external
# remove a bit of signal at the end
n_lim = int(np.ceil(len(input_mic) - t_cut*Fs))
input_clean = signal1[:n_lim]
input_mic = input_mic[:n_lim]
out_DirectMVDR = out_DirectMVDR[:n_lim]
out_RakeMVDR = out_RakeMVDR[:n_lim]
out_DirectPerceptual = out_DirectPerceptual[:n_lim]
out_RakePerceptual = out_RakePerceptual[:n_lim]


# compute time-frequency planes
F0 = pra.stft(input_clean, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F1 = pra.stft(input_mic, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F2 = pra.stft(out_DirectMVDR, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F3 = pra.stft(out_RakeMVDR, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F4 = pra.stft(out_DirectPerceptual, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F5 = pra.stft(out_RakePerceptual, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)

# (not so) fancy way to set the scale to avoid having the spectrum
github LCAV / FRIDA / figure_doa_9_mics_10_src.py View on Github external
# highpass filter at 150
        for s in speech_signals.T:
            s[:] = pra.highpass(s, fs, fc=150.)
        for s in silence.T:
            s[:] = pra.highpass(s, fs, fc=150.)

    # Normalize the amplitude
    n_factor = 0.95 / np.max(np.abs(speech_signals))
    speech_signals *= n_factor
    silence *= n_factor

    # estimate noise floor
    y_noise_stft = []
    for k in range(num_mic):
        y_stft = pra.stft(silence[:, k], fft_size, frame_shift_step,
                          transform=rfft, win=win_stft).T / np.sqrt(fft_size)
        y_noise_stft.append(y_stft)
    y_noise_stft = np.array(y_noise_stft)
    noise_floor = np.mean(np.abs(y_noise_stft) ** 2)

    # estimate SNR in dB (on 1st microphone)
    noise_var = np.mean(np.abs(silence) ** 2)
    sig_var = np.mean(np.abs(speech_signals) ** 2)
    # rought estimate of SNR
    SNR = 10 * np.log10((sig_var - noise_var) / noise_var)
    print('Estimated SNR: ' + str(SNR))

    # Compute DFT of snapshots
    # -------------------------
    y_mic_stft = []
    for k in range(num_mic):
github LCAV / FRIDA / tools / generators.py View on Github external
# Now do the short time Fourier transform
    # The resulting signal is M x fft_size/2+1 x number of frames
    y_hat_stft_noiseless = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y]) / np.sqrt(fft_size)

    # compute noise variance based on SNR
    signal_energy = linalg.norm(y_hat_stft_noiseless.flatten()) ** 2
    noise_energy = signal_energy / 10 ** (SNR * 0.1)
    sigma2_noise = noise_energy / y_hat_stft_noiseless.size

    # Add noise to the signals
    y_noisy = y + np.sqrt(sigma2_noise) * np.array(np.random.randn(*y.shape), dtype=np.float32)

    y_hat_stft = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y_noisy]) / np.sqrt(fft_size)

    return y_hat_stft, y_hat_stft_noiseless
github LCAV / FRIDA / figure_doa_experiment.py View on Github external
for s in speech_signals.T:
        s[:] = pra.highpass(s, pmt['fs'], 100.)

    if pmt['stft_win']:
        stft_win = np.hanning(pmt['nfft'])
    else:
        stft_win = None

    # Normalize the amplitude
    speech_signals *= pmt['scaling']

    # Compute STFT of signal
    # -------------------------
    y_mic_stft = []
    for k in range(speech_signals.shape[1]):
        y_stft = pra.stft(speech_signals[:, k], pmt['nfft'], pmt['stft_hop'],
                          transform=rfft, win=stft_win).T / np.sqrt(pmt['nfft'])
        y_mic_stft.append(y_stft)
    y_mic_stft = np.array(y_mic_stft)

    # estimate SNR in dB (on 1st microphone)
    sig_var = np.var(speech_signals)
    SNR = 10*np.log10( (sig_var - pmt['noise_var']) / pmt['noise_var'] )

    freq_bins = copy.copy(pmt['freq_bins'][K-1])

    # dict for output
    phi_recon = {}

    for alg in algo_names:

        # Use the convenient dictionary of algorithms defined
github LCAV / pyroomacoustics / examples / beamforming_time_domain.py View on Github external
input_clean = signal1[:n_lim]
input_mic = input_mic[:n_lim]
out_DirectMVDR = out_DirectMVDR[:n_lim]
out_RakeMVDR = out_RakeMVDR[:n_lim]
out_DirectPerceptual = out_DirectPerceptual[:n_lim]
out_RakePerceptual = out_RakePerceptual[:n_lim]


# compute time-frequency planes
F0 = pra.stft(input_clean, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F1 = pra.stft(input_mic, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F2 = pra.stft(out_DirectMVDR, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F3 = pra.stft(out_RakeMVDR, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F4 = pra.stft(out_DirectPerceptual, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)
F5 = pra.stft(out_RakePerceptual, fft_size, fft_hop, 
          win=analysis_window, 
          zp_back=fft_zp)

# (not so) fancy way to set the scale to avoid having the spectrum
# dominated by a few outliers
p_min = 7
p_max = 100