Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_get_df(self):
"""Test getting the genes out of the csv file."""
fc_df = read_fold_change_df(FOLD_CHANGES_EXAMPLE)
significant_genes = filter_p_value(fc_df)
gene_sets = gsea_gmt_parser(GMT_EXAMPLE)
self.assertEqual(significant_genes, {'C', 'A'})
self.assertEqual(gene_sets, {'pathway1': ['A', 'B', 'C', 'D'], 'pathway2': ['E', 'F', 'G', 'H']})
enriched_pathways_df = perform_hypergeometric_test(
significant_genes,
{('pathway1', 'kegg'): ['A', 'B', 'C', 'D'], ('pathway2', 'kegg'): ['E', 'F', 'G', 'H']},
apply_threshold=True,
)
self.assertIsInstance(enriched_pathways_df, pd.DataFrame)
self.assertEqual(enriched_pathways_df.shape, (1, 4))
def test_ssgsea1(ssGCT, geneGMT):
# Only tests of the command runs successfully,
# doesnt't check the image
tmpdir= TemporaryDirectory(dir="tests")
ssgsea(ssGCT, geneGMT, tmpdir.name, permutation_num=100)
tmpdir.cleanup()
def test_ssgsea2(ssGCT, geneGMT):
# Only tests of the command runs successfully,
# doesnt't check the image
tmpdir= TemporaryDirectory(dir="tests")
ssgsea(ssGCT, geneGMT, tmpdir.name, permutation_num=0)
tmpdir.cleanup()
ssgsea(ssGCT, geneGMT, None, permutation_num=0)
def test_ssgsea2(ssGCT, geneGMT):
# Only tests of the command runs successfully,
# doesnt't check the image
tmpdir= TemporaryDirectory(dir="tests")
ssgsea(ssGCT, geneGMT, tmpdir.name, permutation_num=0)
tmpdir.cleanup()
ssgsea(ssGCT, geneGMT, None, permutation_num=0)
def __init__(self, indir, outdir='GSEApy_Replot', weighted_score_type=1,
min_size=3, max_size=1000, figsize=(6.5,6), graph_num=20, format='pdf', verbose=False):
self.indir=indir
self.outdir=outdir
self.weighted_score_type=weighted_score_type
self.min_size=min_size
self.max_size=max_size
self.figsize=figsize
self.fignum=int(graph_num)
self.format=format
self.verbose=bool(verbose)
self.module='replot'
self.gene_sets=None
self.ascending=False
# init logger
mkdirs(self.outdir)
outlog = os.path.join(self.outdir,"gseapy.%s.%s.log"%(self.module,"run"))
self._logger = log_init(outlog=outlog,
log_level=logging.INFO if self.verbose else logging.WARNING)
def run(self):
def _download_libraries(self, libname):
""" download enrichr libraries."""
self._logger.info("Downloading and generating Enrichr library gene sets......")
s = retry(5)
# queery string
ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary'
query_string = '?mode=text&libraryName=%s'
# get
response = s.get( ENRICHR_URL + query_string % libname, timeout=None)
if not response.ok:
raise Exception('Error fetching enrichment results, check internet connection first.')
# reformat to dict and save to disk
mkdirs(DEFAULT_CACHE_PATH)
genesets_dict = {}
outname = "enrichr.%s.gmt"%libname
gmtout = open(os.path.join(DEFAULT_CACHE_PATH, outname), "w")
for line in response.iter_lines(chunk_size=1024, decode_unicode='utf-8'):
line=line.strip()
k = line.split("\t")[0]
v = list(map(lambda x: x.split(",")[0], line.split("\t")[2:]))
genesets_dict.update({ k: v})
outline = "%s\t\t%s\n"%(k, "\t".join(v))
gmtout.write(outline)
gmtout.close()
return genesets_dict
def prepare_outdir(self):
"""create temp directory."""
self._outdir = self.outdir
if self._outdir is None:
self._tmpdir = TemporaryDirectory()
self.outdir = self._tmpdir.name
elif isinstance(self.outdir, str):
mkdirs(self.outdir)
else:
raise Exception("Error parsing outdir: %s"%type(self.outdir))
# handle gmt type
if isinstance(self.gene_sets, str):
_gset = os.path.split(self.gene_sets)[-1].lower().rstrip(".gmt")
elif isinstance(self.gene_sets, dict):
_gset = "blank_name"
else:
raise Exception("Error parsing gene_sets parameter for gene sets")
logfile = os.path.join(self.outdir, "gseapy.%s.%s.log" % (self.module, _gset))
return logfile
assert self.fignum > 0
import glob
from bs4 import BeautifulSoup
# parsing files.......
try:
results_path = glob.glob(self.indir+'*/edb/results.edb')[0]
rank_path = glob.glob(self.indir+'*/edb/*.rnk')[0]
gene_set_path = glob.glob(self.indir+'*/edb/gene_sets.gmt')[0]
except IndexError as e:
sys.stderr.write("Could not locate GSEA files in the given directory!")
sys.exit(1)
# extract sample names from .cls file
cls_path = glob.glob(self.indir+'*/edb/*.cls')
if cls_path:
pos, neg, classes = gsea_cls_parser(cls_path[0])
else:
# logic for prerank results
pos, neg = '',''
# start reploting
self.gene_sets=gene_set_path
# obtain gene sets
gene_set_dict = self.parse_gmt(gmt=gene_set_path)
# obtain rank_metrics
rank_metric = self._load_ranking(rank_path)
correl_vector = rank_metric.values
gene_list = rank_metric.index.values
# extract each enriment term in the results.edb files and plot.
database = BeautifulSoup(open(results_path), features='xml')
length = len(database.findAll('DTG'))
fig_num = self.fignum if self.fignum <= length else length
for idx in range(fig_num):
enr = Enrichr(gene_list=args.gene_list, descriptions=args.descrip,
gene_sets=args.library, organism=args.organism,
outdir=args.outdir, format=args.format, cutoff=args.thresh,
background=args.bg, figsize=args.figsize,
top_term=args.term, no_plot=args.noplot, verbose=args.verbose)
enr.run()
elif subcommand == "biomart":
from .parser import Biomart
# read input file or a argument
name, value = args.filter
if os.path.isfile(value):
with open(value, 'r') as val:
lines = val.readlines()
value = [ l.strip() for l in lines]
# run query
bm = Biomart(host=args.host, verbose=args.verbose)
bm.query(dataset=args.bg, attributes=args.attrs.split(","),
filters={name : value}, filename=args.ofile)
else:
argparser.print_help()
sys.exit(0)
def _download_libraries(self, libname):
""" download enrichr libraries."""
self._logger.info("Downloading and generating Enrichr library gene sets......")
s = retry(5)
# queery string
ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary'
query_string = '?mode=text&libraryName=%s'
# get
response = s.get( ENRICHR_URL + query_string % libname, timeout=None)
if not response.ok:
raise Exception('Error fetching enrichment results, check internet connection first.')
# reformat to dict and save to disk
mkdirs(DEFAULT_CACHE_PATH)
genesets_dict = {}
outname = "enrichr.%s.gmt"%libname
gmtout = open(os.path.join(DEFAULT_CACHE_PATH, outname), "w")
for line in response.iter_lines(chunk_size=1024, decode_unicode='utf-8'):
line=line.strip()
k = line.split("\t")[0]
v = list(map(lambda x: x.split(",")[0], line.split("\t")[2:]))
genesets_dict.update({ k: v})
outline = "%s\t\t%s\n"%(k, "\t".join(v))
gmtout.write(outline)
gmtout.close()
return genesets_dict