How to use the sidekit.frontend.features.mfcc function in SIDEKIT

To help you get started, we’ve selected a few SIDEKIT examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kleinzcy / speech_signal_processing / d_vector.py View on Github external
feature = []
            label = []
            new_x = []
            new_y = []
            for i in range(len(x)):
                for j in range(x[i].shape[0]//self.sample_rate):
                    new_x.append(x[i][j*self.sample_rate:(j+1)*self.sample_rate])
                    new_y.append(y[i])

            x = new_x
            y = new_y
            for i in tqdm(range(len(x))):
                # 这里MFCC和PLP默认是16000Hz,注意修改
                # mfcc 25ms窗长,10ms重叠
                if feature_type == 'MFCC':
                    _feature = mfcc(x[i], fs=self.sample_rate)[0]
                elif feature_type == 'PLP':
                    _feature = plp(x[i], fs=self.sample_rate)[0]
                else:
                    raise NameError
                # 特征出了问题,存在一些无穷大,导致整个网络的梯度爆炸了,需要特殊处理才行
                if np.isnan(_feature).sum()>0:
                    continue
                # _feature = np.concatenate([_feature,self.delta(_feature)],axis=1)
                # _feature = preprocessing.scale(_feature)
                # _feature = preprocessing.StandardScaler().fit_transform(_feature)
                # 每2*num为一个输入,并且重叠num
                feature.append(_feature)
                label.append(y[i])

            print(len(feature), feature[0].shape)
            self.save(feature, '{}_{}_feature'.format(datatype, feature_type))
github kleinzcy / speech_signal_processing / GMM_UBM.py View on Github external
extract feature from x
    :param x: type list, each element is audio
    :param y: type list, each element is label of audio in x
    :param filepath: the path to save feature
    :param is_train: if true, generate train_data(type dict, key is lable, value is feature),
                     if false, just extract feature from x
    :return:
    """
    start_time = get_time()
    print("Extract {} feature...".format(feature_type))
    feature = []
    train_data = {}
    for i in tqdm(range(len(x))):
        # extract mfcc feature based on psf, you can look more detail on psf's website.
        if feature_type=='MFCC':
            _feature = mfcc(x[i])
            mfcc_delta = delta(_feature)
            _feature = np.hstack((_feature, mfcc_delta))

            _feature = preprocessing.scale(_feature)
        elif feature_type=='PLP':
            _feature = plp(x[i])
            mfcc_delta = delta(_feature)
            _feature = np.hstack((_feature, mfcc_delta))

            _feature = preprocessing.scale(_feature)
        else:
            raise NameError

        # append _feature to feature
        feature.append(_feature)
github kleinzcy / speech_signal_processing / LSTM.py View on Github external
:return:
        """
        start_time = get_time()
        if not os.path.exists('feature'):
            os.mkdir('feature')

        if not os.path.exists('feature/{}_feature.pkl'.format(feature_type)):
            x, y = self.load_data()
            print("Extract {} feature...".format(feature_type))
            feature = []
            label = []
            for i in tqdm(range(len(x))):
                # 这里MFCC和PLP默认是16000Hz,注意修改
                # mfcc 25ms窗长,10ms重叠
                if feature_type == 'MFCC':
                    _feature = mfcc(x[i])[0]
                elif feature_type == 'PLP':
                    _feature = plp(x[i])[0]
                else:
                    raise NameError

                _feature = np.concatenate([_feature,self.delta(_feature)],axis=1)
                # TODO 兼容i-vector 和 d-vector
                _feature = preprocessing.scale(_feature)
                num = 10
                for j in range(_feature.shape[0]//num-1):
                    feature.append(_feature[j*num:j*num+2*num])
                    label.append(y[i])
            print(len(feature), feature[0].shape)
            self.save(feature, '{}_feature'.format(feature_type))
            self.save(label, '{}_label'.format(feature_type))
github ina-foss / inaSpeechSegmenter / inaSpeechSegmenter / segmenter.py View on Github external
def _wav2feats(wavname):
    """ 
    """
    ext = os.path.splitext(wavname)[-1]
    assert ext.lower() == '.wav' or ext.lower() == '.wave'
    sig, read_framerate, sampwidth = read_wav(wavname)
    shp = sig.shape
    # wav should contain a single channel
    assert len(shp) == 1 or (len(shp) == 2 and shp[1] == 1)
    # wav sample rate should be 16000 Hz
    assert read_framerate == 16000
    assert sampwidth == 2
    sig *= (2**(15-sampwidth))
    _, loge, _, mspec = mfcc(sig.astype(np.float32), get_mspec=True)
    return mspec, loge
github kleinzcy / speech_signal_processing / UI / GMM_UBM_GUI.py View on Github external
def record(self):
        self.textBrowser.append('Start the recording !')
        record(seconds=3)
        self.textBrowser.append('3 seconds record has completed.')
        _, audio = read(filename='test.wav')
        if self.feature_type=='MFCC':
            feature = mfcc(audio)[0]
        else:
            feature = plp(audio)[0]

        _delta = delta(feature)
        feature = np.hstack((feature, _delta))

        feature = preprocessing.scale(feature)
        self.feature = feature
        os.remove('test.wav')