Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param prefix: prefix used to store the StatServer in its file
:param batch_size: number of sessions to process in a batch
:param uncertainty: a boolean, if True, return the diagonal of the uncertainty matrices
:param num_thread: number of process to run in parallel
:return: a StatServer with i-vectors in the stat1 attribute and a matrix of uncertainty matrices (optional)
"""
assert (isinstance(ubm, Mixture) and ubm.validate()), "Second argument must be a proper Mixture"
tv_rank = self.F.shape[1]
# Set useful variables
with h5py.File(stat_server_filename, 'r') as fh: # open the first statserver to get size
_, sv_size = fh[prefix + 'stat1'].shape
nb_sessions = fh[prefix + "modelset"].shape[0]
iv_server = StatServer()
iv_server.modelset = fh.get(prefix + 'modelset').value
iv_server.segset = fh.get(prefix + 'segset').value
tmpstart = fh.get(prefix+"start").value
tmpstop = fh.get(prefix+"stop").value
iv_server.start = numpy.empty(fh[prefix+"start"].shape, '|O')
iv_server.stop = numpy.empty(fh[prefix+"stop"].shape, '|O')
iv_server.start[tmpstart != -1] = tmpstart[tmpstart != -1]
iv_server.stop[tmpstop != -1] = tmpstop[tmpstop != -1]
iv_server.stat0 = numpy.ones((nb_sessions, 1), dtype=STAT_TYPE)
with warnings.catch_warnings():
iv_server.stat1 = serialize(numpy.zeros((nb_sessions, tv_rank)))
iv_sigma = serialize(numpy.zeros((nb_sessions, tv_rank)))
nb_sessions = iv_server.modelset.shape[0]
from the UBM.
:param ubm: a Mixture object used to compute the denominator of the
likelihood ratios
:param enroll: a StatServer object which stat1 attribute contains
mean super-vectors of the GMMs to use to compute the numerator
of the likelihood ratios.
:param ndx: an Ndx object which define the list of trials to compute
:param feature_server: a FeatureServer object to load the features
:param num_thread: number of thread to launch in parallel
:return: a Score object.
"""
assert isinstance(ubm, Mixture), 'First parameter should be a Mixture'
assert isinstance(enroll, StatServer), 'Second parameter should be a StatServer'
assert isinstance(ndx, Ndx), 'Third parameter should be a Ndx'
assert isinstance(feature_server, FeaturesServer), 'Fourth parameter should be a FeatureServer'
# Remove missing models and test segments
if feature_server.features_extractor is None:
existing_test_seg, test_seg_idx = sidekit.sv_utils.check_file_list(ndx.segset,
feature_server.feature_filename_structure)
clean_ndx = ndx.filter(enroll.modelset, existing_test_seg, True)
elif feature_server.features_extractor.audio_filename_structure is not None:
existing_test_seg, test_seg_idx = \
sidekit.sv_utils.check_file_list(ndx.segset, feature_server.features_extractor.audio_filename_structure)
clean_ndx = ndx.filter(enroll.modelset, existing_test_seg, True)
else:
clean_ndx = ndx
s = numpy.zeros(clean_ndx.trialmask.shape)
gmm_covariance = "diag" if ubm.invcov.ndim == 2 else "full"
# Set useful variables
tv_rank = self.F.shape[1]
feature_size = ubm.mu.shape[1]
nb_distrib = ubm.w.shape[0]
# Whiten the statistics for diagonal or full models
if gmm_covariance == "diag":
stat_server.whiten_stat1(ubm.get_mean_super_vector(), 1. / ubm.get_invcov_super_vector())
elif gmm_covariance == "full":
stat_server.whiten_stat1(ubm.get_mean_super_vector(), ubm.invchol)
# Extract i-vectors
iv_stat_server = StatServer()
iv_stat_server.modelset = copy.deepcopy(stat_server.modelset)
iv_stat_server.segset = copy.deepcopy(stat_server.segset)
iv_stat_server.start = copy.deepcopy(stat_server.start)
iv_stat_server.stop = copy.deepcopy(stat_server.stop)
iv_stat_server.stat0 = numpy.ones((stat_server.modelset.shape[0], 1))
iv_stat_server.stat1 = numpy.ones((stat_server.modelset.shape[0], tv_rank))
iv_sigma = numpy.ones((stat_server.modelset.shape[0], tv_rank))
# Replicate self.stat0
index_map = numpy.repeat(numpy.arange(nb_distrib), feature_size)
for sess in tqdm(range(stat_server.segset.shape[0]), desc="Processing"):
inv_lambda = scipy.linalg.inv(numpy.eye(tv_rank) + (self.F.T *
stat_server.stat0[sess, index_map]).dot(self.F))
"""
Train a total variability model using a single process on a single node.
This method is provided for didactic purpose and should not be used as it uses
to much memory and is to slow. If you want to use a single process
run: "total_variability_single"
:param stat_server: the StatServer containing data to train the model
:param ubm: a Mixture object
:param tv_rank: rank of the total variability model
:param nb_iter: number of EM iteration
:param min_div: boolean, if True, apply minimum divergence re-estimation
:param tv_init: initial matrix to start the EM iterations with
:param save_init: boolean, if True, save the initial matrix
:param output_file_name: name of the file where to save the matrix
"""
assert(isinstance(stat_server, StatServer) and stat_server.validate()), \
"First argument must be a proper StatServer"
assert(isinstance(ubm, Mixture) and ubm.validate()), "Second argument must be a proper Mixture"
assert(isinstance(tv_rank, int) and (0 < tv_rank <= min(stat_server.stat1.shape))), \
"tv_rank must be a positive integer less than the dimension of the statistics"
assert(isinstance(nb_iter, int) and (0 < nb_iter)), "nb_iter must be a positive integer"
gmm_covariance = "diag" if ubm.invcov.ndim == 2 else "full"
# Set useful variables
nb_sessions, sv_size = stat_server.stat1.shape
feature_size = ubm.mu.shape[1]
nb_distrib = ubm.w.shape[0]
# Whiten the statistics for diagonal or full models
if gmm_covariance == "diag":
stat_server.whiten_stat1(ubm.get_mean_super_vector(), 1. / ubm.get_invcov_super_vector())
:param ubm: a Mixture object used to compute the denominator
of the likelihood ratios
:param enroll: a StatServer object which stat1 attribute contains mean
super-vectors of the GMMs to use to compute the numerator of the
likelihood ratios.
:param ndx: an Ndx object which define the list of trials to compute
:param feature_server: sidekit.FeaturesServer used to load the acoustic parameters
:param score_mat: a ndarray of scores to fill
:param seg_idx: the list of unique test segments to process.
Those test segments should belong to the list of test segments
in the ndx object. By setting seg_idx=None, all test segments
from the ndx object will be processed
"""
assert isinstance(ubm, Mixture), 'First parameter should be a Mixture'
assert isinstance(enroll, StatServer), 'Second parameter should be a StatServer'
assert isinstance(ndx, Ndx), 'Third parameter should be a Ndx'
assert isinstance(feature_server, FeaturesServer), 'Fourth parameter should be a FeatureServer'
if seg_idx is None:
seg_idx = range(ndx.segset.shape[0])
for ts in seg_idx:
logging.info('Compute trials involving test segment %d/%d', ts + 1, ndx.segset.shape[0])
# Select the models to test with the current segment
models = ndx.modelset[ndx.trialmask[:, ts]]
ind_dict = dict((k, i) for i, k in enumerate(ndx.modelset))
inter = set(ind_dict.keys()).intersection(models)
idx_ndx = [ind_dict[x] for x in inter]
ind_dict = dict((k, i) for i, k in enumerate(enroll.modelset))
inter = set(ind_dict.keys()).intersection(models)