How to use SIDEKIT - 10 common examples

To help you get started, we’ve selected a few SIDEKIT examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)

        # Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):

            # Create serialized accumulators for the list of models to process
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                _A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
                _C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
                _R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))

            total_session_nb = 0

            # E-step
            # Accumulate statistics for each StatServer from the list
            for stat_server_file in stat_server_filename:

                # get info from the current StatServer
                with h5py.File(stat_server_file, 'r') as fh:
                    nb_sessions = fh["modelset"].shape[0]
                    total_session_nb += nb_sessions
                    batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
                    batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)

                    manager = multiprocessing.Manager()
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
self.F = tv_init
        self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)

        # Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):

            # Create serialized accumulators for the list of models to process
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                _A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
                _C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
                _R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))

            total_session_nb = 0

            # E-step
            # Accumulate statistics for each StatServer from the list
            for stat_server_file in stat_server_filename:

                # get info from the current StatServer
                with h5py.File(stat_server_file, 'r') as fh:
                    nb_sessions = fh["modelset"].shape[0]
                    total_session_nb += nb_sessions
                    batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
                    batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
# Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):

            # Create serialized accumulators for the list of models to process
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                _A = serialize(numpy.zeros((distrib_nb, tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))
                _C = serialize(numpy.zeros((tv_rank, sv_size), dtype=STAT_TYPE))
                _R = serialize(numpy.zeros((tv_rank * (tv_rank + 1) // 2), dtype=STAT_TYPE))

            total_session_nb = 0

            # E-step
            # Accumulate statistics for each StatServer from the list
            for stat_server_file in stat_server_filename:

                # get info from the current StatServer
                with h5py.File(stat_server_file, 'r') as fh:
                    nb_sessions = fh["modelset"].shape[0]
                    total_session_nb += nb_sessions
                    batch_nb = int(numpy.floor(nb_sessions / float(batch_size) + 0.999))
                    batch_indices = numpy.array_split(numpy.arange(nb_sessions), batch_nb)

                    manager = multiprocessing.Manager()
                    q = manager.Queue()
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
assert (isinstance(ubm, Mixture) and ubm.validate()), "Second argument must be a proper Mixture"
        assert (isinstance(nb_iter, int) and (0 < nb_iter)), "nb_iter must be a positive integer"

        gmm_covariance = "diag" if ubm.invcov.ndim == 2 else "full"

        # Set useful variables
        with h5py.File(stat_server_filename[0], 'r') as fh:  # open the first StatServer to get size
            _, sv_size = fh['stat1'].shape
            feature_size = fh['stat1'].shape[1] // fh['stat0'].shape[1]
            distrib_nb = fh['stat0'].shape[1]

        upper_triangle_indices = numpy.triu_indices(tv_rank)

        # mean and Sigma are initialized at ZEROS as statistics are centered
        self.mean = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)
        self.F = serialize(numpy.zeros((sv_size, tv_rank)).astype(STAT_TYPE))
        if tv_init is None:
            self.F = numpy.random.randn(sv_size, tv_rank).astype(STAT_TYPE)
        else:
            self.F = tv_init
        self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)

        # Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):

            # Create serialized accumulators for the list of models to process
github Anwarvic / Speaker-Recognition / sidekit / factor_analyser.py View on Github external
assert (isinstance(ubm, Mixture) and ubm.validate()), "Second argument must be a proper Mixture"
        assert (isinstance(nb_iter, int) and (0 < nb_iter)), "nb_iter must be a positive integer"

        gmm_covariance = "diag" if ubm.invcov.ndim == 2 else "full"

        # Set useful variables
        with h5py.File(stat_server_filename[0], 'r') as fh:  # open the first StatServer to get size
            _, sv_size = fh['stat1'].shape
            feature_size = fh['stat1'].shape[1] // fh['stat0'].shape[1]
            distrib_nb = fh['stat0'].shape[1]

        upper_triangle_indices = numpy.triu_indices(tv_rank)

        # mean and Sigma are initialized at ZEROS as statistics are centered
        self.mean = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)
        self.F = serialize(numpy.zeros((sv_size, tv_rank)).astype(STAT_TYPE))
        if tv_init is None:
            self.F = numpy.random.randn(sv_size, tv_rank).astype(STAT_TYPE)
        else:
            self.F = tv_init
        self.Sigma = numpy.zeros(ubm.get_mean_super_vector().shape, dtype=STAT_TYPE)

        # Save init if required
        if output_file_name is None:
            output_file_name = "temporary_factor_analyser"
        if save_init:
            self.write(output_file_name + "_init.h5")

        # Estimate  TV iteratively
        for it in range(nb_iter):
github stdm / ZHAW_deep_voice / networks / i_vector / ivec_controller.py View on Github external
self.logger.info('training UBM')

        fs = sidekit.FeaturesServer(
            feature_filename_structure=(
                "{dir}/{speaker_list}/feat/{{}}.{ext}".format(dir=feature_dir, speaker_list=speaker_list,
                                                              ext=feature_extension)),
            dataset_list=["energy", "cep", "vad"],
            mask="[0-12]",
            feat_norm="cmvn",
            keep_all_features=True,
            delta=True,
            double_delta=True,
            rasta=True,
            context=None)

        ubm = sidekit.Mixture()
        llk = ubm.EM_split(fs, ubm_list, distrib_nb, num_thread=num_threads)
        ubm.write(get_experiment_nets()+'/ubm_{}.h5'.format(self.network_file))

        return ubm, fs
github Anwarvic / Speaker-Recognition / ubm.py View on Github external
- Create FeatureServe for the enroll features
        - create use EM algorithm to train our UBM over the enroll features
        - create StatServer to save trained parameters
        - if Save arugment is True (which is by default), then it saves that
          StatServer.
        Args:
            SAVE (boolean): if True, then it will save the StatServer. If False,
               then the StatServer will be discarded.
        """
        #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html
        train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll"))
        for i in range(len(train_list)):
            train_list[i] = train_list[i].split(".h5")[0]
        server = self.createFeatureServer("enroll")
        logging.info("Training...")
        ubm = sidekit.Mixture()
        # Set the model name
        ubm.name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) 
        # Expectation-Maximization estimation of the Mixture parameters.
        ubm.EM_split(
            features_server=server, #sidekit.FeaturesServer used to load data
            feature_list=train_list, #list of feature files to train the model
            distrib_nb=self.NUM_GAUSSIANS, #number of Gaussian distributions
            num_thread=self.NUM_THREADS, # number of parallel processes
            save_partial=False, # if False, it only saves the last model
            iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8)
            )
            # -> 2 iterations of EM with 2    distributions
            # -> 2 iterations of EM with 4    distributions
            # -> 4 iterations of EM with 8    distributions
            # -> 4 iterations of EM with 16   distributions
            # -> 4 iterations of EM with 32   distributions
github stdm / ZHAW_deep_voice / networks / i_vector / ivec_controller.py View on Github external
finally, testing:
        '''
        speaker_list=self.get_validation_data_name()
        distrib_nb=self.config.getint('i_vector', 'distrib_nb')
        nbThread = self.config.getint('i_vector', 'nbThread')
        vector_size=self.config.getint('i_vector', 'vector_size')
        feature_extension = 'h5'

        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_times=[]
        checkpoints=["/TV_{}".format(self.network_file)]

        #load data:
        ubm = sidekit.Mixture()
        ubm.read(get_experiment_nets()+'/ubm_{}.h5'.format(self.network_file))
        ubm_list, test_list_long = self.load_data(speaker_list,os.path.splitext(os.path.split(self.get_validation_train_data())[1])[0])
        ubm_list, test_list_short = self.load_data(speaker_list,os.path.splitext(os.path.split(self.get_validation_test_data())[1])[0])
        tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5(get_experiment_nets()+"/TV_{}".format(self.network_file))

        fs = sidekit.FeaturesServer(feature_filename_structure=(
                "{dir}/{speaker_list}/feat/{{}}.{ext}".format(dir=get_training('i_vector'), speaker_list=speaker_list,
                                                              ext=feature_extension)),
            dataset_list=["energy", "cep", "vad"],
            mask="[0-12]",
            feat_norm="cmvn",
            keep_all_features=True,
            delta=True,
            double_delta=True,
            rasta=True,
            context=None)
github Anwarvic / Speaker-Recognition / data_init.py View on Github external
Args:
            group (string): name of the group that we want to create idmap for
        NOTE: Duplicated entries are allowed in each list.
        """
        assert group in ["enroll", "test"],\
            "Invalid group name!! Choose either 'enroll', 'test'"
        # Make enrollment (IdMap) file list
        group_dir = os.path.join(self.audio_dir, group)
        group_files = sorted(os.listdir(group_dir))
        # list of model IDs
        group_models = [files.split('.')[0] for files in group_files]
        # list of audio segments IDs
        group_segments = [group+"/"+f for f in group_files]
        
        # Generate IdMap
        group_idmap = sidekit.IdMap()
        group_idmap.leftids = np.asarray(group_models)
        group_idmap.rightids = np.asarray(group_segments)
        group_idmap.start = np.empty(group_idmap.rightids.shape, '|O')
        group_idmap.stop = np.empty(group_idmap.rightids.shape, '|O')
        if group_idmap.validate():
            group_idmap.write(os.path.join(self.task_dir, group+'_idmap.h5'))
            #generate tv_idmap and plda_idmap as well
            if group == "enroll":
                group_idmap.write(os.path.join(self.task_dir, 'tv_idmap.h5'))
                group_idmap.write(os.path.join(self.task_dir, 'plda_idmap.h5'))
        else:
            raise RuntimeError('Problems with creating idMap file')
github stdm / ZHAW_deep_voice / networks / i_vector / ivec_controller.py View on Github external
def train_ubm(self, feature_dir, speaker_list, ubm_list, distrib_nb, feature_extension='h5', num_threads=10):
        '''
        training the GMM with EM-Algorithm
        '''

        self.logger.info('training UBM')

        fs = sidekit.FeaturesServer(
            feature_filename_structure=(
                "{dir}/{speaker_list}/feat/{{}}.{ext}".format(dir=feature_dir, speaker_list=speaker_list,
                                                              ext=feature_extension)),
            dataset_list=["energy", "cep", "vad"],
            mask="[0-12]",
            feat_norm="cmvn",
            keep_all_features=True,
            delta=True,
            double_delta=True,
            rasta=True,
            context=None)

        ubm = sidekit.Mixture()
        llk = ubm.EM_split(fs, ubm_list, distrib_nb, num_thread=num_threads)
        ubm.write(get_experiment_nets()+'/ubm_{}.h5'.format(self.network_file))