How to use the sidekit.StatServer.read_subset function in SIDEKIT

To help you get started, we’ve selected a few SIDEKIT examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Anwarvic / Speaker-Recognition / i-vector.py View on Github external
# Jointly compute the sufficient statistics of TV and (if enabled) PLDA data
        back_filename = 'back_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", back_filename)):
            #BUG: don't use self.NUM_THREADS when assgining num_thread
            # as it's prune to race-conditioning
            back_stat.accumulate_stat(
                ubm=ubm,
                feature_server=fs,
                seg_indices=range(back_stat.segset.shape[0])
                )
            back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename))
        
        # Load the sufficient statistics from TV training data
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", tv_filename)):
            tv_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename),
                tv_idmap
                )
            tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename))
        
        # Load sufficient statistics and extract i-vectors from PLDA training data
        if self.ENABLE_PLDA:
            plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GAUSSIANS)
            if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", plda_filename)):
                plda_stat = sidekit.StatServer.read_subset(
                    os.path.join(self.BASE_DIR, "stat", back_filename),
                    plda_idmap
                    )
                plda_stat.write(os.path.join(self.BASE_DIR, "stat", plda_filename))
        
        # Load sufficient statistics from test data
github Anwarvic / Speaker-Recognition / i-vector.py View on Github external
back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename))
        
        # Load the sufficient statistics from TV training data
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", tv_filename)):
            tv_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename),
                tv_idmap
                )
            tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename))
        
        # Load sufficient statistics and extract i-vectors from PLDA training data
        if self.ENABLE_PLDA:
            plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GAUSSIANS)
            if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", plda_filename)):
                plda_stat = sidekit.StatServer.read_subset(
                    os.path.join(self.BASE_DIR, "stat", back_filename),
                    plda_idmap
                    )
                plda_stat.write(os.path.join(self.BASE_DIR, "stat", plda_filename))
        
        # Load sufficient statistics from test data
        filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)):
            test_idmap = sidekit.IdMap.read(os.path.join(self.BASE_DIR, "task", "test_idmap.h5"))
            test_stat = sidekit.StatServer( statserver_file_name=test_idmap, 
                                            ubm=ubm
                                          )
            # Create Feature Server
            fs = self.createFeatureServer()
            # Jointly compute the sufficient statistics of TV and PLDA data
            #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning