How to use the sidekit.Mixture function in SIDEKIT

To help you get started, we’ve selected a few SIDEKIT examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stdm / ZHAW_deep_voice / networks / i_vector / ivec_controller.py View on Github external
self.logger.info('training UBM')

        fs = sidekit.FeaturesServer(
            feature_filename_structure=(
                "{dir}/{speaker_list}/feat/{{}}.{ext}".format(dir=feature_dir, speaker_list=speaker_list,
                                                              ext=feature_extension)),
            dataset_list=["energy", "cep", "vad"],
            mask="[0-12]",
            feat_norm="cmvn",
            keep_all_features=True,
            delta=True,
            double_delta=True,
            rasta=True,
            context=None)

        ubm = sidekit.Mixture()
        llk = ubm.EM_split(fs, ubm_list, distrib_nb, num_thread=num_threads)
        ubm.write(get_experiment_nets()+'/ubm_{}.h5'.format(self.network_file))

        return ubm, fs
github Anwarvic / Speaker-Recognition / ubm.py View on Github external
- Create FeatureServe for the enroll features
        - create use EM algorithm to train our UBM over the enroll features
        - create StatServer to save trained parameters
        - if Save arugment is True (which is by default), then it saves that
          StatServer.
        Args:
            SAVE (boolean): if True, then it will save the StatServer. If False,
               then the StatServer will be discarded.
        """
        #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html
        train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll"))
        for i in range(len(train_list)):
            train_list[i] = train_list[i].split(".h5")[0]
        server = self.createFeatureServer("enroll")
        logging.info("Training...")
        ubm = sidekit.Mixture()
        # Set the model name
        ubm.name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) 
        # Expectation-Maximization estimation of the Mixture parameters.
        ubm.EM_split(
            features_server=server, #sidekit.FeaturesServer used to load data
            feature_list=train_list, #list of feature files to train the model
            distrib_nb=self.NUM_GAUSSIANS, #number of Gaussian distributions
            num_thread=self.NUM_THREADS, # number of parallel processes
            save_partial=False, # if False, it only saves the last model
            iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8)
            )
            # -> 2 iterations of EM with 2    distributions
            # -> 2 iterations of EM with 4    distributions
            # -> 4 iterations of EM with 8    distributions
            # -> 4 iterations of EM with 16   distributions
            # -> 4 iterations of EM with 32   distributions
github stdm / ZHAW_deep_voice / networks / i_vector / ivec_controller.py View on Github external
finally, testing:
        '''
        speaker_list=self.get_validation_data_name()
        distrib_nb=self.config.getint('i_vector', 'distrib_nb')
        nbThread = self.config.getint('i_vector', 'nbThread')
        vector_size=self.config.getint('i_vector', 'vector_size')
        feature_extension = 'h5'

        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_times=[]
        checkpoints=["/TV_{}".format(self.network_file)]

        #load data:
        ubm = sidekit.Mixture()
        ubm.read(get_experiment_nets()+'/ubm_{}.h5'.format(self.network_file))
        ubm_list, test_list_long = self.load_data(speaker_list,os.path.splitext(os.path.split(self.get_validation_train_data())[1])[0])
        ubm_list, test_list_short = self.load_data(speaker_list,os.path.splitext(os.path.split(self.get_validation_test_data())[1])[0])
        tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5(get_experiment_nets()+"/TV_{}".format(self.network_file))

        fs = sidekit.FeaturesServer(feature_filename_structure=(
                "{dir}/{speaker_list}/feat/{{}}.{ext}".format(dir=get_training('i_vector'), speaker_list=speaker_list,
                                                              ext=feature_extension)),
            dataset_list=["energy", "cep", "vad"],
            mask="[0-12]",
            feat_norm="cmvn",
            keep_all_features=True,
            delta=True,
            double_delta=True,
            rasta=True,
            context=None)
github Anwarvic / Speaker-Recognition / i-vector.py View on Github external
def train_tv(self):
        """
        This method is used to train the Total Variability (TV) matrix
        and save it into 'ivector' directory !! 
        """
        # Create status servers
        self.__create_stats()

        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "stat", tv_filename),
                                    ubm,
                                    tv_rank=self.TV_RANK,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath
github Anwarvic / Speaker-Recognition / i-vector.py View on Github external
# Create a joint StatServer for TV and PLDA training data
            back_idmap = plda_idmap.merge(tv_idmap)
            if not back_idmap.validate():
                raise RuntimeError("Error merging tv_idmap & plda_idmap")
        
        # Check UBM model
        ubm_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm_path = os.path.join(self.BASE_DIR, "ubm", ubm_name)
        if not os.path.exists(ubm_path):
            #if UBM model does not exist, train one
            logging.info("Training UBM-{} model".format(self.NUM_GAUSSIANS))
            ubm = UBM(self.conf_path)
            ubm.train()
        #load trained UBM model
        logging.info("Loading trained UBM-{} model".format(self.NUM_GAUSSIANS))
        ubm = sidekit.Mixture()
        ubm.read(ubm_path)
        back_stat = sidekit.StatServer( statserver_file_name=back_idmap, 
                                        ubm=ubm
                                      )
        # Create Feature Server
        fs = self.createFeatureServer()
        
        # Jointly compute the sufficient statistics of TV and (if enabled) PLDA data
        back_filename = 'back_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", back_filename)):
            #BUG: don't use self.NUM_THREADS when assgining num_thread
            # as it's prune to race-conditioning
            back_stat.accumulate_stat(
                ubm=ubm,
                feature_server=fs,
                seg_indices=range(back_stat.segset.shape[0])
github Anwarvic / Speaker-Recognition / i-vector.py View on Github external
def evaluate(self, explain=True):
        """
        This method is used to score our trained model. 
        """
        # Load UBM model
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

        # Load TV matrix
        filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser(outputPath+".h5")

        # Extract i-vectors from enrollment data
        logging.info("Extracting i-vectors from enrollment data")
        filename = 'enroll_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        enroll_stat = sidekit.StatServer.read(os.path.join(self.BASE_DIR, 'stat', filename))
        enroll_iv = fa.extract_ivectors_single( ubm=ubm,
                                                stat_server=enroll_stat,
                                                uncertainty=False
                                              )
github Anwarvic / Speaker-Recognition / ubm.py View on Github external
- read the test_ndx file that contains the test set
        - read the trained UBM model, and trained parameters (enroll_stat file)
        - evaluate the test set using gmm_scoring and write the scores
        - if explain=True, write the scores in a more readible way
        Args:
            explain (boolean): If True, write another text file that contain
            the same information as the one within ubm_scores file but in a 
            readible way.
        """
        ############################# READING ############################
        # Create Feature server
        server = self.createFeatureServer()
        # Read the index for the test datas
        test_ndx = sidekit.Ndx.read(os.path.join(self.BASE_DIR, "task", "test_ndx.h5"))
        # Read the UBM model
        ubm = sidekit.Mixture()
        model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))
        filename = "enroll_stat_{}.h5".format(self.NUM_GAUSSIANS)
        enroll_stat = sidekit.StatServer.read(os.path.join(self.BASE_DIR, "stat", filename))
        # MAP adaptation of enrollment speaker models
        enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm=ubm,
                                                            r=3 # MAP regulation factor
                                                           )

        ############################ Evaluating ###########################
        # Compute scores
        scores_gmm_ubm = sidekit.gmm_scoring(ubm=ubm,
                                             enroll=enroll_sv,
                                             ndx=test_ndx,
                                             feature_server=server,
                                             num_thread=self.NUM_THREADS