How to use the sidekit.sv_utils.serialize function in SIDEKIT

To help you get started, we’ve selected a few SIDEKIT examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)

        # Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):

            # Create serialized accumulators for the list of models to process
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                _A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
                _C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
                _R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))

            total_session_nb = 0

            # E-step
            # Accumulate statistics for each StatServer from the list
            for stat_server_file in stat_server_filename:

                # get info from the current StatServer
                with h5py.File(stat_server_file, 'r') as fh:
                    nb_sessions = fh["modelset"].shape[0]
                    total_session_nb += nb_sessions
                    batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
                    batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)

                    manager = multiprocessing.Manager()
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
self.F = tv_init
        self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)

        # Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):

            # Create serialized accumulators for the list of models to process
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                _A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
                _C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
                _R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))

            total_session_nb = 0

            # E-step
            # Accumulate statistics for each StatServer from the list
            for stat_server_file in stat_server_filename:

                # get info from the current StatServer
                with h5py.File(stat_server_file, 'r') as fh:
                    nb_sessions = fh["modelset"].shape[0]
                    total_session_nb += nb_sessions
                    batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
                    batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
# Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):

            # Create serialized accumulators for the list of models to process
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                _A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
                _C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
                _R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))

            total_session_nb = 0

            # E-step
            # Accumulate statistics for each StatServer from the list
            for stat_server_file in stat_server_filename:

                # get info from the current StatServer
                with h5py.File(stat_server_file, 'r') as fh:
                    nb_sessions = fh["modelset"].shape[0]
                    total_session_nb += nb_sessions
                    batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
                    batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)

                    manager = multiprocessing.Manager()
                    q = manager.Queue()
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
iv_server = StatServer()
            iv_server.modelset = fh.get(prefix + 'modelset').value
            iv_server.segset = fh.get(prefix + 'segset').value

            tmpstart = fh.get(prefix+"start").value
            tmpstop = fh.get(prefix+"stop").value
            iv_server.start = numpy.empty(fh[prefix+"start"].shape, '|O')
            iv_server.stop = numpy.empty(fh[prefix+"stop"].shape, '|O')
            iv_server.start[tmpstart != -1] = tmpstart[tmpstart != -1]
            iv_server.stop[tmpstop != -1] = tmpstop[tmpstop != -1]

            iv_server.stat0 = numpy.ones((nb_sessions, 1), dtype=STAT_TYPE)
            with warnings.catch_warnings():
                iv_server.stat1 = serialize(numpy.zeros((nb_sessions, tv_rank)))
                iv_sigma = serialize(numpy.zeros((nb_sessions, tv_rank)))

            nb_sessions = iv_server.modelset.shape[0]
            batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
            batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)

            manager = multiprocessing.Manager()
            q = manager.Queue()
            pool = multiprocessing.Pool(num_thread + 2)

            # put listener to work first
            watcher = pool.apply_async(iv_collect, ((iv_server.stat1, iv_sigma), q))
            # fire off workers
            jobs = []

            # Load data per batch to reduce the memory footprint
            for batch_idx in batch_indices:
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
nb_sessions = fh[prefix + "modelset"].shape[0]

            iv_server = StatServer()
            iv_server.modelset = fh.get(prefix + 'modelset').value
            iv_server.segset = fh.get(prefix + 'segset').value

            tmpstart = fh.get(prefix+"start").value
            tmpstop = fh.get(prefix+"stop").value
            iv_server.start = numpy.empty(fh[prefix+"start"].shape, '|O')
            iv_server.stop = numpy.empty(fh[prefix+"stop"].shape, '|O')
            iv_server.start[tmpstart != -1] = tmpstart[tmpstart != -1]
            iv_server.stop[tmpstop != -1] = tmpstop[tmpstop != -1]

            iv_server.stat0 = numpy.ones((nb_sessions, 1), dtype=STAT_TYPE)
            with warnings.catch_warnings():
                iv_server.stat1 = serialize(numpy.zeros((nb_sessions, tv_rank)))
                iv_sigma = serialize(numpy.zeros((nb_sessions, tv_rank)))

            nb_sessions = iv_server.modelset.shape[0]
            batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
            batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)

            manager = multiprocessing.Manager()
            q = manager.Queue()
            pool = multiprocessing.Pool(num_thread + 2)

            # put listener to work first
            watcher = pool.apply_async(iv_collect, ((iv_server.stat1, iv_sigma), q))
            # fire off workers
            jobs = []

            # Load data per batch to reduce the memory footprint