Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
feature = []
label = []
new_x = []
new_y = []
for i in range(len(x)):
for j in range(x[i].shape[0]//self.sample_rate):
new_x.append(x[i][j*self.sample_rate:(j+1)*self.sample_rate])
new_y.append(y[i])
x = new_x
y = new_y
for i in tqdm(range(len(x))):
# 这里MFCC和PLP默认是16000Hz,注意修改
# mfcc 25ms窗长,10ms重叠
if feature_type == 'MFCC':
_feature = mfcc(x[i], fs=self.sample_rate)[0]
elif feature_type == 'PLP':
_feature = plp(x[i], fs=self.sample_rate)[0]
else:
raise NameError
# 特征出了问题,存在一些无穷大,导致整个网络的梯度爆炸了,需要特殊处理才行
if np.isnan(_feature).sum()>0:
continue
# _feature = np.concatenate([_feature,self.delta(_feature)],axis=1)
# _feature = preprocessing.scale(_feature)
# _feature = preprocessing.StandardScaler().fit_transform(_feature)
# 每2*num为一个输入,并且重叠num
feature.append(_feature)
label.append(y[i])
print(len(feature), feature[0].shape)
self.save(feature, '{}_{}_feature'.format(datatype, feature_type))
extract feature from x
:param x: type list, each element is audio
:param y: type list, each element is label of audio in x
:param filepath: the path to save feature
:param is_train: if true, generate train_data(type dict, key is lable, value is feature),
if false, just extract feature from x
:return:
"""
start_time = get_time()
print("Extract {} feature...".format(feature_type))
feature = []
train_data = {}
for i in tqdm(range(len(x))):
# extract mfcc feature based on psf, you can look more detail on psf's website.
if feature_type=='MFCC':
_feature = mfcc(x[i])
mfcc_delta = delta(_feature)
_feature = np.hstack((_feature, mfcc_delta))
_feature = preprocessing.scale(_feature)
elif feature_type=='PLP':
_feature = plp(x[i])
mfcc_delta = delta(_feature)
_feature = np.hstack((_feature, mfcc_delta))
_feature = preprocessing.scale(_feature)
else:
raise NameError
# append _feature to feature
feature.append(_feature)
:return:
"""
start_time = get_time()
if not os.path.exists('feature'):
os.mkdir('feature')
if not os.path.exists('feature/{}_feature.pkl'.format(feature_type)):
x, y = self.load_data()
print("Extract {} feature...".format(feature_type))
feature = []
label = []
for i in tqdm(range(len(x))):
# 这里MFCC和PLP默认是16000Hz,注意修改
# mfcc 25ms窗长,10ms重叠
if feature_type == 'MFCC':
_feature = mfcc(x[i])[0]
elif feature_type == 'PLP':
_feature = plp(x[i])[0]
else:
raise NameError
_feature = np.concatenate([_feature,self.delta(_feature)],axis=1)
# TODO 兼容i-vector 和 d-vector
_feature = preprocessing.scale(_feature)
num = 10
for j in range(_feature.shape[0]//num-1):
feature.append(_feature[j*num:j*num+2*num])
label.append(y[i])
print(len(feature), feature[0].shape)
self.save(feature, '{}_feature'.format(feature_type))
self.save(label, '{}_label'.format(feature_type))
def _wav2feats(wavname):
"""
"""
ext = os.path.splitext(wavname)[-1]
assert ext.lower() == '.wav' or ext.lower() == '.wave'
sig, read_framerate, sampwidth = read_wav(wavname)
shp = sig.shape
# wav should contain a single channel
assert len(shp) == 1 or (len(shp) == 2 and shp[1] == 1)
# wav sample rate should be 16000 Hz
assert read_framerate == 16000
assert sampwidth == 2
sig *= (2**(15-sampwidth))
_, loge, _, mspec = mfcc(sig.astype(np.float32), get_mspec=True)
return mspec, loge
def record(self):
self.textBrowser.append('Start the recording !')
record(seconds=3)
self.textBrowser.append('3 seconds record has completed.')
_, audio = read(filename='test.wav')
if self.feature_type=='MFCC':
feature = mfcc(audio)[0]
else:
feature = plp(audio)[0]
_delta = delta(feature)
feature = np.hstack((feature, _delta))
feature = preprocessing.scale(feature)
self.feature = feature
os.remove('test.wav')