How to use the gseapy.parser.gsea_cls_parser function in gseapy

To help you get started, we’ve selected a few gseapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zqfang / GSEApy / gseapy / gsea.py View on Github external
assert self.fignum > 0
        import glob
        from bs4 import BeautifulSoup

        # parsing files.......
        try:
            results_path = glob.glob(self.indir+'*/edb/results.edb')[0]
            rank_path =  glob.glob(self.indir+'*/edb/*.rnk')[0]
            gene_set_path =  glob.glob(self.indir+'*/edb/gene_sets.gmt')[0]
        except IndexError as e:
            sys.stderr.write("Could not locate GSEA files in the given directory!")
            sys.exit(1)
        # extract sample names from .cls file
        cls_path = glob.glob(self.indir+'*/edb/*.cls')
        if cls_path:
            pos, neg, classes = gsea_cls_parser(cls_path[0])
        else:
            # logic for prerank results
            pos, neg = '',''
        # start reploting
        self.gene_sets=gene_set_path
        # obtain gene sets
        gene_set_dict = self.parse_gmt(gmt=gene_set_path)
        # obtain rank_metrics
        rank_metric = self._load_ranking(rank_path)
        correl_vector =  rank_metric.values
        gene_list = rank_metric.index.values
        # extract each enriment term in the results.edb files and plot.
        database = BeautifulSoup(open(results_path), features='xml')
        length = len(database.findAll('DTG'))
        fig_num = self.fignum if self.fignum <= length else length
        for idx in range(fig_num):
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def run(self):
        """GSEA main procedure"""

        assert self.permutation_type in ["phenotype", "gene_set"]
        assert self.min_size <= self.max_size

        # Start Analysis
        self._logger.info("Parsing data files for GSEA.............................")
        # phenotype labels parsing
        phenoPos, phenoNeg, cls_vector = gsea_cls_parser(self.classes)
        # select correct expression genes and values.
        dat = self.load_data(cls_vector)
        # data frame must have length > 1
        assert len(dat) > 1
        # ranking metrics calculation.
        dat2 = ranking_metric(df=dat, method=self.method, pos=phenoPos, neg=phenoNeg,
                              classes=cls_vector, ascending=self.ascending)
        self.ranking = dat2
        # filtering out gene sets and build gene sets dictionary
        gmt = self.load_gmt(gene_list=dat2.index.values, gmt=self.gene_sets)

        self._logger.info("%04d gene_sets used for further statistical testing....."% len(gmt))
        self._logger.info("Start to run GSEA...Might take a while..................")
        # cpu numbers
        self._set_cores()
        # compute ES, NES, pval, FDR, RES
github zqfang / GSEApy / gseapy / run.py View on Github external
:return: | a dictionary where key is a gene set and values are:
        | { es: enrichment score, 
        | nes: normalized enrichment score, 
        | p: P-value, 
        | fdr: FDR, 
        | size: gene set size,
        | matched_size: genes matched to the data, 
        | genes: gene names from the data set }

    """
    assert len(data) > 1
    assert permutation_type in ["phenotype", "gene_set"]
    
    data = pd.read_table(data)
    classes = gsea_cls_parser(cls)[2]
    gmt = gsea_gmt_parser(gene_sets)
    gmt.sort()
    #Ecompute ES, NES, pval, FDR, RES
    if rank_metric is None:
        dat = ranking_metric(data,method= method,classes = classes ,ascending=ascending)
        results,hit_ind,RES = gsea_compute(data = dat, gene_list = None,rankings = None,
                    n=permutation_n,gmt = gmt, weighted_score_type=weighted_score_type,
                    permutation_type=permutation_type)
    else:
        dat = pd.read_table(rank_metric)
        results,hit_ind,RES = gsea_compute(data = None, gene_list = rank_metric['gene_name'],rankings = rank_metric['rank'].values,
                                           n=permutation_n,gmt = gmt, weighted_score_type=weighted_score_type,
                                           permutation_type=permutation_type)
    
    res = {}