How to use the gseapy.parser.Biomart function in gseapy

To help you get started, we’ve selected a few gseapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zqfang / GSEApy / gseapy / __main__.py View on Github external
enr = Enrichr(gene_list=args.gene_list, descriptions=args.descrip,
                      gene_sets=args.library, organism=args.organism,
                      outdir=args.outdir, format=args.format, cutoff=args.thresh, 
                      background=args.bg, figsize=args.figsize,
                      top_term=args.term, no_plot=args.noplot, verbose=args.verbose)
        enr.run()
    elif subcommand == "biomart":
        from .parser import Biomart
        # read input file or a argument
        name, value = args.filter
        if os.path.isfile(value):
            with open(value, 'r') as val:
                lines = val.readlines()
            value = [ l.strip() for l in lines]
        # run query
        bm = Biomart(host=args.host, verbose=args.verbose)
        bm.query(dataset=args.bg, attributes=args.attrs.split(","), 
                 filters={name : value}, filename=args.ofile)
    else:
        argparser.print_help()
        sys.exit(0)
github zqfang / GSEApy / gseapy / enrichr.py View on Github external
with open(self.background) as b:
                bg2 = b.readlines() 
            bg = [g.strip() for g in bg2]  
            return set(bg)
        
        # package included data
        DB_FILE = resource_filename("gseapy", "data/{}.background.genes.txt".format(self.background))
        filename = os.path.join(DEFAULT_CACHE_PATH, "{}.background.genes.txt".format(self.background))  
        if os.path.exists(filename):
            df = pd.read_csv(filename,sep="\t")
        elif os.path.exists(DB_FILE):
            df = pd.read_csv(DB_FILE,sep="\t")
        else:
            # background is a biomart database name
            self._logger.warning("Downloading %s for the first time. It might take a couple of miniutes."%self.background)
            bm = Biomart()
            df = bm.query(dataset=self.background)
            df.dropna(subset=['go_id'], inplace=True)
        self._logger.info("using all annotated genes with GO_ID as background genes")
        df.dropna(subset=['entrezgene'], inplace=True)     
        # input id type: entrez or gene_name
        if self._isezid:
            bg = df['entrezgene'].astype(int)
        else:
            bg = df['external_gene_name']

        return set(bg)
github zqfang / GSEApy / gseapy / parser.py View on Github external
# while (self.host is None) and (i < 3):
        #     self.host = self.ghosts[i]
        #     i +=1 
        self.new_query()
        # 'mmusculus_gene_ensembl'
        self.add_dataset_to_xml(dataset)
        for at in attributes:
            self.add_attribute_to_xml(at)
        # add filters
        if filters:
            for k, v in filters.items(): 
                if isinstance(v, list): v = ",".join(v)
                self.add_filter_to_xml(k, v)

        xml_query = self.get_xml()
        results = super(Biomart, self).query(xml_query)
        df = pd.read_csv(StringIO(results), header=None, sep="\t",
                         names=attributes, index_col=None)
        # save file to cache path.
        if filename is None: 
            mkdirs(DEFAULT_CACHE_PATH)
            filename = os.path.join(DEFAULT_CACHE_PATH, "{}.background.genes.txt".format(dataset))
        df.to_csv(filename, sep="\t", index=False)
      
        return df