How to use the gseapy.algorithm.gsea_compute function in gseapy

To help you get started, we’ve selected a few gseapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zqfang / GSEApy / gseapy / gsea.py View on Github external
assert self.min_size <= self.max_size
        mkdirs(self.outdir)
        self.resultsOnSamples = OrderedDict()
        outdir = self.outdir
        # iter through each sample
        for name, ser in df.iteritems():
            self.outdir = os.path.join(outdir, str(name))
            self._logger.info("Run Sample: %s " % name)
            mkdirs(self.outdir)
            # sort ranking values from high to low or reverse
            dat2 = ser.sort_values(ascending=self.ascending)
            # reset integer index, or caused unwanted problems
            # df.reset_index(drop=True, inplace=True)

            # compute ES, NES, pval, FDR, RES
            gsea_results, hit_ind,rank_ES, subsets = gsea_compute(data=dat2, n=self.permutation_num, gmt=gmt,
                                                                  weighted_score_type=self.weighted_score_type,
                                                                  permutation_type='gene_set', method=None,
                                                                  pheno_pos='', pheno_neg='',
                                                                  classes=None, ascending=self.ascending,
                                                                  processes=self._processes,
                                                                  seed=self.seed, single=True, scale=self.scale)

            # write file
            res_zip = zip(subsets, list(gsea_results), hit_ind, rank_ES)
            self._save_results(zipdata=res_zip, outdir=self.outdir, module=self.module,
                                gmt=gmt, rank_metric=dat2, permutation_type="gene_sets")
            self.resultsOnSamples[name] = self.res2d.es
            # plotting
            if self._noplot: continue
            self._logger.info("Plotting Sample: %s \n" % name)
            self._plotting(rank_metric=dat2, results=self.results,
github zqfang / GSEApy / gseapy / run.py View on Github external
| size: gene set size,
        | matched_size: genes matched to the data, 
        | genes: gene names from the data set }

    """
    assert len(data) > 1
    assert permutation_type in ["phenotype", "gene_set"]
    
    data = pd.read_table(data)
    classes = gsea_cls_parser(cls)[2]
    gmt = gsea_gmt_parser(gene_sets)
    gmt.sort()
    #Ecompute ES, NES, pval, FDR, RES
    if rank_metric is None:
        dat = ranking_metric(data,method= method,classes = classes ,ascending=ascending)
        results,hit_ind,RES = gsea_compute(data = dat, gene_list = None,rankings = None,
                    n=permutation_n,gmt = gmt, weighted_score_type=weighted_score_type,
                    permutation_type=permutation_type)
    else:
        dat = pd.read_table(rank_metric)
        results,hit_ind,RES = gsea_compute(data = None, gene_list = rank_metric['gene_name'],rankings = rank_metric['rank'].values,
                                           n=permutation_n,gmt = gmt, weighted_score_type=weighted_score_type,
                                           permutation_type=permutation_type)
    
    res = {}

    for gs, gseale in zip(gmt.keys(), list(results)):
        rdict = {}
        rdict['es'] = gseale[0]
        rdict['nes'] = gseale[1]
        rdict['pval'] = gseale[2]
        rdict['fdr'] = gseale[3]
github zqfang / GSEApy / gseapy / gsea.py View on Github external
# parsing rankings
        dat2 = self._load_ranking(self.rnk)
        assert len(dat2) > 1

        # cpu numbers
        self._set_cores()
        # Start Analysis
        self._logger.info("Parsing data files for GSEA.............................")
        # filtering out gene sets and build gene sets dictionary
        gmt = self.load_gmt(gene_list=dat2.index.values, gmt=self.gene_sets)

        self._logger.info("%04d gene_sets used for further statistical testing....."% len(gmt))
        self._logger.info("Start to run GSEA...Might take a while..................")
        # compute ES, NES, pval, FDR, RES
        gsea_results, hit_ind,rank_ES, subsets = gsea_compute(data=dat2, n=self.permutation_num, gmt=gmt,
                                                              weighted_score_type=self.weighted_score_type,
                                                              permutation_type='gene_set', method=None,
                                                              pheno_pos=self.pheno_pos, pheno_neg=self.pheno_neg,
                                                              classes=None, ascending=self.ascending,
                                                              processes=self._processes, seed=self.seed)
        self._logger.info("Start to generate gseapy reports, and produce figures...")
        res_zip = zip(subsets, list(gsea_results), hit_ind, rank_ES)
        self._save_results(zipdata=res_zip, outdir=self.outdir, module=self.module,
                                   gmt=gmt, rank_metric=dat2, permutation_type="gene_sets")

        # Plotting
        if not self._noplot:
            self._plotting(rank_metric=dat2, results=self.results,
                           graph_num=self.graph_num, outdir=self.outdir,
                           figsize=self.figsize, format=self.format,
                           pheno_pos=self.pheno_pos, pheno_neg=self.pheno_neg)