How to use gseapy - 10 common examples

To help you get started, we’ve selected a few gseapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pathwayforte / pathway-forte / tests / test_ora.py View on Github external
def test_get_df(self):
        """Test getting the genes out of the csv file."""
        fc_df = read_fold_change_df(FOLD_CHANGES_EXAMPLE)

        significant_genes = filter_p_value(fc_df)
        gene_sets = gsea_gmt_parser(GMT_EXAMPLE)

        self.assertEqual(significant_genes, {'C', 'A'})
        self.assertEqual(gene_sets, {'pathway1': ['A', 'B', 'C', 'D'], 'pathway2': ['E', 'F', 'G', 'H']})

        enriched_pathways_df = perform_hypergeometric_test(
            significant_genes,
            {('pathway1', 'kegg'): ['A', 'B', 'C', 'D'], ('pathway2', 'kegg'): ['E', 'F', 'G', 'H']},
            apply_threshold=True,
        )

        self.assertIsInstance(enriched_pathways_df, pd.DataFrame)
        self.assertEqual(enriched_pathways_df.shape, (1, 4))
github zqfang / GSEApy / tests / test_commands.py View on Github external
def test_ssgsea1(ssGCT, geneGMT):
    # Only tests of the command runs successfully,
    # doesnt't check the image
    tmpdir= TemporaryDirectory(dir="tests")
    ssgsea(ssGCT, geneGMT, tmpdir.name, permutation_num=100)
    tmpdir.cleanup()
github zqfang / GSEApy / tests / test_commands.py View on Github external
def test_ssgsea2(ssGCT, geneGMT):
    # Only tests of the command runs successfully,
    # doesnt't check the image
    tmpdir= TemporaryDirectory(dir="tests")
    ssgsea(ssGCT, geneGMT, tmpdir.name, permutation_num=0)
    tmpdir.cleanup()
    ssgsea(ssGCT, geneGMT, None, permutation_num=0)
github zqfang / GSEApy / tests / test_commands.py View on Github external
def test_ssgsea2(ssGCT, geneGMT):
    # Only tests of the command runs successfully,
    # doesnt't check the image
    tmpdir= TemporaryDirectory(dir="tests")
    ssgsea(ssGCT, geneGMT, tmpdir.name, permutation_num=0)
    tmpdir.cleanup()
    ssgsea(ssGCT, geneGMT, None, permutation_num=0)
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def __init__(self, indir, outdir='GSEApy_Replot', weighted_score_type=1,
                  min_size=3, max_size=1000, figsize=(6.5,6), graph_num=20, format='pdf', verbose=False):
        self.indir=indir
        self.outdir=outdir
        self.weighted_score_type=weighted_score_type
        self.min_size=min_size
        self.max_size=max_size
        self.figsize=figsize
        self.fignum=int(graph_num)
        self.format=format
        self.verbose=bool(verbose)
        self.module='replot'
        self.gene_sets=None
        self.ascending=False
        # init logger
        mkdirs(self.outdir)
        outlog = os.path.join(self.outdir,"gseapy.%s.%s.log"%(self.module,"run"))
        self._logger = log_init(outlog=outlog,
                                log_level=logging.INFO if self.verbose else logging.WARNING)
    def run(self):
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def _download_libraries(self, libname):
        """ download enrichr libraries."""
        self._logger.info("Downloading and generating Enrichr library gene sets......")
        s = retry(5)
        # queery string
        ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary'
        query_string = '?mode=text&libraryName=%s'
        # get
        response = s.get( ENRICHR_URL + query_string % libname, timeout=None)
        if not response.ok:
            raise Exception('Error fetching enrichment results, check internet connection first.')
        # reformat to dict and save to disk
        mkdirs(DEFAULT_CACHE_PATH)
        genesets_dict = {}
        outname = "enrichr.%s.gmt"%libname
        gmtout = open(os.path.join(DEFAULT_CACHE_PATH, outname), "w")
        for line in response.iter_lines(chunk_size=1024, decode_unicode='utf-8'):
            line=line.strip()
            k = line.split("\t")[0]
            v = list(map(lambda x: x.split(",")[0], line.split("\t")[2:]))
            genesets_dict.update({ k: v})
            outline = "%s\t\t%s\n"%(k, "\t".join(v))
            gmtout.write(outline)
        gmtout.close()

        return genesets_dict
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def prepare_outdir(self):
        """create temp directory."""
        self._outdir = self.outdir
        if self._outdir is None:
            self._tmpdir = TemporaryDirectory()
            self.outdir = self._tmpdir.name
        elif isinstance(self.outdir, str):
            mkdirs(self.outdir)
        else:
            raise Exception("Error parsing outdir: %s"%type(self.outdir))

        # handle gmt type
        if isinstance(self.gene_sets, str):
            _gset = os.path.split(self.gene_sets)[-1].lower().rstrip(".gmt")
        elif isinstance(self.gene_sets, dict):
            _gset = "blank_name"
        else:
            raise Exception("Error parsing gene_sets parameter for gene sets")

        logfile = os.path.join(self.outdir, "gseapy.%s.%s.log" % (self.module, _gset))
        return logfile
github zqfang / GSEApy / gseapy / gsea.py View on Github external
assert self.fignum > 0
        import glob
        from bs4 import BeautifulSoup

        # parsing files.......
        try:
            results_path = glob.glob(self.indir+'*/edb/results.edb')[0]
            rank_path =  glob.glob(self.indir+'*/edb/*.rnk')[0]
            gene_set_path =  glob.glob(self.indir+'*/edb/gene_sets.gmt')[0]
        except IndexError as e:
            sys.stderr.write("Could not locate GSEA files in the given directory!")
            sys.exit(1)
        # extract sample names from .cls file
        cls_path = glob.glob(self.indir+'*/edb/*.cls')
        if cls_path:
            pos, neg, classes = gsea_cls_parser(cls_path[0])
        else:
            # logic for prerank results
            pos, neg = '',''
        # start reploting
        self.gene_sets=gene_set_path
        # obtain gene sets
        gene_set_dict = self.parse_gmt(gmt=gene_set_path)
        # obtain rank_metrics
        rank_metric = self._load_ranking(rank_path)
        correl_vector =  rank_metric.values
        gene_list = rank_metric.index.values
        # extract each enriment term in the results.edb files and plot.
        database = BeautifulSoup(open(results_path), features='xml')
        length = len(database.findAll('DTG'))
        fig_num = self.fignum if self.fignum <= length else length
        for idx in range(fig_num):
github zqfang / GSEApy / gseapy / __main__.py View on Github external
enr = Enrichr(gene_list=args.gene_list, descriptions=args.descrip,
                      gene_sets=args.library, organism=args.organism,
                      outdir=args.outdir, format=args.format, cutoff=args.thresh, 
                      background=args.bg, figsize=args.figsize,
                      top_term=args.term, no_plot=args.noplot, verbose=args.verbose)
        enr.run()
    elif subcommand == "biomart":
        from .parser import Biomart
        # read input file or a argument
        name, value = args.filter
        if os.path.isfile(value):
            with open(value, 'r') as val:
                lines = val.readlines()
            value = [ l.strip() for l in lines]
        # run query
        bm = Biomart(host=args.host, verbose=args.verbose)
        bm.query(dataset=args.bg, attributes=args.attrs.split(","), 
                 filters={name : value}, filename=args.ofile)
    else:
        argparser.print_help()
        sys.exit(0)
github zqfang / GSEApy / gseapy / gsea.py View on Github external
def _download_libraries(self, libname):
        """ download enrichr libraries."""
        self._logger.info("Downloading and generating Enrichr library gene sets......")
        s = retry(5)
        # queery string
        ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary'
        query_string = '?mode=text&libraryName=%s'
        # get
        response = s.get( ENRICHR_URL + query_string % libname, timeout=None)
        if not response.ok:
            raise Exception('Error fetching enrichment results, check internet connection first.')
        # reformat to dict and save to disk
        mkdirs(DEFAULT_CACHE_PATH)
        genesets_dict = {}
        outname = "enrichr.%s.gmt"%libname
        gmtout = open(os.path.join(DEFAULT_CACHE_PATH, outname), "w")
        for line in response.iter_lines(chunk_size=1024, decode_unicode='utf-8'):
            line=line.strip()
            k = line.split("\t")[0]
            v = list(map(lambda x: x.split(",")[0], line.split("\t")[2:]))
            genesets_dict.update({ k: v})
            outline = "%s\t\t%s\n"%(k, "\t".join(v))
            gmtout.write(outline)
        gmtout.close()

        return genesets_dict