How to use the gseapy.utils.mkdirs function in gseapy

To help you get started, we’ve selected a few gseapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zqfang / GSEApy / gseapy / gsea.py View on Github external
def __init__(self, indir, outdir='GSEApy_Replot', weighted_score_type=1,
                  min_size=3, max_size=1000, figsize=(6.5,6), graph_num=20, format='pdf', verbose=False):
        self.indir=indir
        self.outdir=outdir
        self.weighted_score_type=weighted_score_type
        self.min_size=min_size
        self.max_size=max_size
        self.figsize=figsize
        self.fignum=int(graph_num)
        self.format=format
        self.verbose=bool(verbose)
        self.module='replot'
        self.gene_sets=None
        self.ascending=False
        # init logger
        mkdirs(self.outdir)
        outlog = os.path.join(self.outdir,"gseapy.%s.%s.log"%(self.module,"run"))
        self._logger = log_init(outlog=outlog,
                                log_level=logging.INFO if self.verbose else logging.WARNING)
    def run(self):
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def prepare_outdir(self):
        """create temp directory."""
        self._outdir = self.outdir
        if self._outdir is None:
            self._tmpdir = TemporaryDirectory()
            self.outdir = self._tmpdir.name
        elif isinstance(self.outdir, str):
            mkdirs(self.outdir)
        else:
            raise Exception("Error parsing outdir: %s"%type(self.outdir))

        # handle gmt type
        if isinstance(self.gene_sets, str):
            _gset = os.path.split(self.gene_sets)[-1].lower().rstrip(".gmt")
        elif isinstance(self.gene_sets, dict):
            _gset = "blank_name"
        else:
            raise Exception("Error parsing gene_sets parameter for gene sets")

        logfile = os.path.join(self.outdir, "gseapy.%s.%s.log" % (self.module, _gset))
        return logfile
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def runSamplesPermu(self, df, gmt=None):
        """Single Sample GSEA workflow with permutation procedure"""

        assert self.min_size <= self.max_size
        mkdirs(self.outdir)
        self.resultsOnSamples = OrderedDict()
        outdir = self.outdir
        # iter through each sample
        for name, ser in df.iteritems():
            self.outdir = os.path.join(outdir, str(name))
            self._logger.info("Run Sample: %s " % name)
            mkdirs(self.outdir)
            # sort ranking values from high to low or reverse
            dat2 = ser.sort_values(ascending=self.ascending)
            # reset integer index, or caused unwanted problems
            # df.reset_index(drop=True, inplace=True)

            # compute ES, NES, pval, FDR, RES
            gsea_results, hit_ind,rank_ES, subsets = gsea_compute(data=dat2, n=self.permutation_num, gmt=gmt,
                                                                  weighted_score_type=self.weighted_score_type,
                                                                  permutation_type='gene_set', method=None,
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def runSamplesPermu(self, df, gmt=None):
        """Single Sample GSEA workflow with permutation procedure"""

        assert self.min_size <= self.max_size
        mkdirs(self.outdir)
        self.resultsOnSamples = OrderedDict()
        outdir = self.outdir
        # iter through each sample
        for name, ser in df.iteritems():
            self.outdir = os.path.join(outdir, str(name))
            self._logger.info("Run Sample: %s " % name)
            mkdirs(self.outdir)
            # sort ranking values from high to low or reverse
            dat2 = ser.sort_values(ascending=self.ascending)
            # reset integer index, or caused unwanted problems
            # df.reset_index(drop=True, inplace=True)

            # compute ES, NES, pval, FDR, RES
            gsea_results, hit_ind,rank_ES, subsets = gsea_compute(data=dat2, n=self.permutation_num, gmt=gmt,
                                                                  weighted_score_type=self.weighted_score_type,
                                                                  permutation_type='gene_set', method=None,
                                                                  pheno_pos='', pheno_neg='',
                                                                  classes=None, ascending=self.ascending,
                                                                  processes=self._processes,
                                                                  seed=self.seed, single=True, scale=self.scale)

            # write file
            res_zip = zip(subsets, list(gsea_results), hit_ind, rank_ES)
github zqfang / GSEApy / gseapy / gsea.py View on Github external
# apply_async
            tempes.append(pool.apply_async(enrichment_score_tensor,
                                           args=(genes_sorted, cor_vec, gmt,
                                               self.weighted_score_type,
                                               self.permutation_num, rs, True,
                                               self.scale)))
        pool.close()
        pool.join()
        # save results and plotting
        for i, temp in enumerate(tempes):
            name, rnk = names[i], rankings[i]
            self._logger.info("Calculate Enrichment Score for Sample: %s "%name)
            es, esnull, hit_ind, RES = temp.get()
            # create results subdir
            self.outdir= os.path.join(outdir, str(name))
            mkdirs(self.outdir)
            # save results
            self.resultsOnSamples[name] = pd.Series(data=es, index=subsets, name=name)
            # plotting
            if self._noplot: continue
            self._logger.info("Plotting Sample: %s \n" % name)
            for i, term in enumerate(subsets):
                term = term.replace('/','_').replace(":","_")
                outfile = '{0}/{1}.{2}.{3}'.format(self.outdir, term, self.module, self.format)
                gseaplot(rank_metric=rnk, term=term, 
                         hit_indices=hit_ind[i], nes=es[i], pval=1, fdr=1, 
                         RES=RES[i], pheno_pos='', pheno_neg='', 
                         figsize=self.figsize, ofname=outfile)
        # save es, nes to file
        self._save(outdir)

        return