Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)
# Save init if required
if output_file_name is None:
output_file_name = "temporary_factor_analyser"
if save_init:
self.write(output_file_name + "_init.h5")
# Estimate TV iteratively
for it in range(nb_iter):
# Create serialized accumulators for the list of models to process
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
_A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
_C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
_R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
total_session_nb = 0
# E-step
# Accumulate statistics for each StatServer from the list
for stat_server_file in stat_server_filename:
# get info from the current StatServer
with h5py.File(stat_server_file, 'r') as fh:
nb_sessions = fh["modelset"].shape[0]
total_session_nb += nb_sessions
batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)
manager = multiprocessing.Manager()
self.F = tv_init
self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)
# Save init if required
if output_file_name is None:
output_file_name = "temporary_factor_analyser"
if save_init:
self.write(output_file_name + "_init.h5")
# Estimate TV iteratively
for it in range(nb_iter):
# Create serialized accumulators for the list of models to process
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
_A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
_C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
_R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
total_session_nb = 0
# E-step
# Accumulate statistics for each StatServer from the list
for stat_server_file in stat_server_filename:
# get info from the current StatServer
with h5py.File(stat_server_file, 'r') as fh:
nb_sessions = fh["modelset"].shape[0]
total_session_nb += nb_sessions
batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)
# Save init if required
if output_file_name is None:
output_file_name = "temporary_factor_analyser"
if save_init:
self.write(output_file_name + "_init.h5")
# Estimate TV iteratively
for it in range(nb_iter):
# Create serialized accumulators for the list of models to process
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
_A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
_C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
_R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
total_session_nb = 0
# E-step
# Accumulate statistics for each StatServer from the list
for stat_server_file in stat_server_filename:
# get info from the current StatServer
with h5py.File(stat_server_file, 'r') as fh:
nb_sessions = fh["modelset"].shape[0]
total_session_nb += nb_sessions
batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)
manager = multiprocessing.Manager()
q = manager.Queue()
iv_server = StatServer()
iv_server.modelset = fh.get(prefix + 'modelset').value
iv_server.segset = fh.get(prefix + 'segset').value
tmpstart = fh.get(prefix+"start").value
tmpstop = fh.get(prefix+"stop").value
iv_server.start = numpy.empty(fh[prefix+"start"].shape, '|O')
iv_server.stop = numpy.empty(fh[prefix+"stop"].shape, '|O')
iv_server.start[tmpstart != -1] = tmpstart[tmpstart != -1]
iv_server.stop[tmpstop != -1] = tmpstop[tmpstop != -1]
iv_server.stat0 = numpy.ones((nb_sessions, 1), dtype=STAT_TYPE)
with warnings.catch_warnings():
iv_server.stat1 = serialize(numpy.zeros((nb_sessions, tv_rank)))
iv_sigma = serialize(numpy.zeros((nb_sessions, tv_rank)))
nb_sessions = iv_server.modelset.shape[0]
batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)
manager = multiprocessing.Manager()
q = manager.Queue()
pool = multiprocessing.Pool(num_thread + 2)
# put listener to work first
watcher = pool.apply_async(iv_collect, ((iv_server.stat1, iv_sigma), q))
# fire off workers
jobs = []
# Load data per batch to reduce the memory footprint
for batch_idx in batch_indices:
nb_sessions = fh[prefix + "modelset"].shape[0]
iv_server = StatServer()
iv_server.modelset = fh.get(prefix + 'modelset').value
iv_server.segset = fh.get(prefix + 'segset').value
tmpstart = fh.get(prefix+"start").value
tmpstop = fh.get(prefix+"stop").value
iv_server.start = numpy.empty(fh[prefix+"start"].shape, '|O')
iv_server.stop = numpy.empty(fh[prefix+"stop"].shape, '|O')
iv_server.start[tmpstart != -1] = tmpstart[tmpstart != -1]
iv_server.stop[tmpstop != -1] = tmpstop[tmpstop != -1]
iv_server.stat0 = numpy.ones((nb_sessions, 1), dtype=STAT_TYPE)
with warnings.catch_warnings():
iv_server.stat1 = serialize(numpy.zeros((nb_sessions, tv_rank)))
iv_sigma = serialize(numpy.zeros((nb_sessions, tv_rank)))
nb_sessions = iv_server.modelset.shape[0]
batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)
manager = multiprocessing.Manager()
q = manager.Queue()
pool = multiprocessing.Pool(num_thread + 2)
# put listener to work first
watcher = pool.apply_async(iv_collect, ((iv_server.stat1, iv_sigma), q))
# fire off workers
jobs = []
# Load data per batch to reduce the memory footprint