Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def train_ubm(self, feature_dir, speaker_list, ubm_list, distrib_nb, feature_extension='h5', num_threads=10):
'''
training the GMM with EM-Algorithm
'''
self.logger.info('training UBM')
fs = sidekit.FeaturesServer(
feature_filename_structure=(
"{dir}/{speaker_list}/feat/{{}}.{ext}".format(dir=feature_dir, speaker_list=speaker_list,
ext=feature_extension)),
dataset_list=["energy", "cep", "vad"],
mask="[0-12]",
feat_norm="cmvn",
keep_all_features=True,
delta=True,
double_delta=True,
rasta=True,
context=None)
ubm = sidekit.Mixture()
llk = ubm.EM_split(fs, ubm_list, distrib_nb, num_thread=num_threads)
ubm.write(get_experiment_nets()+'/ubm_{}.h5'.format(self.network_file))
feature_extension = 'h5'
set_of_embeddings = []
set_of_speakers = []
set_of_num_embeddings = []
set_of_times=[]
checkpoints=["/TV_{}".format(self.network_file)]
#load data:
ubm = sidekit.Mixture()
ubm.read(get_experiment_nets()+'/ubm_{}.h5'.format(self.network_file))
ubm_list, test_list_long = self.load_data(speaker_list,os.path.splitext(os.path.split(self.get_validation_train_data())[1])[0])
ubm_list, test_list_short = self.load_data(speaker_list,os.path.splitext(os.path.split(self.get_validation_test_data())[1])[0])
tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5(get_experiment_nets()+"/TV_{}".format(self.network_file))
fs = sidekit.FeaturesServer(feature_filename_structure=(
"{dir}/{speaker_list}/feat/{{}}.{ext}".format(dir=get_training('i_vector'), speaker_list=speaker_list,
ext=feature_extension)),
dataset_list=["energy", "cep", "vad"],
mask="[0-12]",
feat_norm="cmvn",
keep_all_features=True,
delta=True,
double_delta=True,
rasta=True,
context=None)
#exract ivectors
test_stat_long = sidekit.StatServer(test_list_long, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None)
test_stat_long.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(test_stat_long.segset.shape[0]),
num_thread=nbThread)
Returns:
server: which is the FeatureServer object
"""
if group:
feat_dir = os.path.join(self.BASE_DIR, "feat", group)
else:
feat_dir = os.path.join(self.BASE_DIR, "feat")
# feature_filename_structure: structure of the filename to use to load HDF5 files
# dataset_list: string of the form ["cep", "fb", vad", energy", "bnf"]
# feat_norm: type of normalization to apply as post-processing
# delta: if True, append the first order derivative
# double_delta: if True, append the second order derivative
# rasta: if True, perform RASTA filtering
# keep_all_features: boolean, if True, keep all features; if False,
# keep frames according to the vad labels
server = sidekit.FeaturesServer(
feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
dataset_list=self.conf['features'],
feat_norm="cmvn", #cepstral mean-variance normalization
delta=True,
double_delta=True,
rasta=True,
keep_all_features=True)
logging.info("Feature-Server is created")
logging.debug(server)
return server