Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def start_toil(job, dataset_name, name="bsa"):
path = os.path.join(get_interfaces_path(dataset_name), "by_superfamily")
for cdd, sfam_id in iter_cdd(use_id=True, group_superfam=True):
sfam_path = os.path.join(path, str(int(sfam_id)), str(int(sfam_id)))
if not os.path.isfile(sfam_path+".observed_interactome"):
continue
cjob = job.addChildJobFn(observed_bsa, dataset_name, sfam_id)
if not os.path.isfile(sfam_path+".inferred_interactome"):
continue
cjob.addFollowOnJobFn(inferred_bsa, dataset_name, sfam_id)
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
dataset_name = options.jobStore.split(":")[-1]
job = Job.wrapJobFn(start_toil, dataset_name)
with Toil(options) as toil:
toil.start(job)
def main(args):
parser = build_parser()
Job.Runner.addToilOptions(parser)
options = parser.parse_args()
inputs = {'numWorkers': options.num_nodes - 1,
'outDir': options.output_directory,
'bamName': options.input_file_name,
'knownSNPs': options.known_SNPs,
'driverMemory': options.driver_memory,
'executorMemory': options.executor_memory,
'sudo': options.sudo,
'suffix': None}
Job.Runner.startToil(Job.wrapJobFn(start_master, inputs), options)
def __init__(self, message):
Job.__init__(self, memory="1G", cores=2, disk="2G")
self.message = message
def runCactusProgressive(seqFile,
configFile,
toilDir,
logLevel=None, retryCount=0,
batchSystem="single_machine",
rescueJobFrequency=None,
skipAlignments=False,
buildHal=True,
buildAvgs=False,
toilStats=False,
maxCpus=None):
opts = Job.Runner.getDefaultOptions(toilDir)
opts.batchSystem = batchSystem if batchSystem is not None else opts.batchSystem
opts.logLevel = logLevel if logLevel is not None else opts.logLevel
opts.maxCores = maxCpus if maxCpus is not None else opts.maxCores
# Used for tests
opts.scale = 0.1
opts.retryCount = retryCount if retryCount is not None else opts.retryCount
# This *shouldn't* be necessary, but it looks like the toil
# deadlock-detection still has issues.
opts.deadlockWait = 3600
opts.buildHal = buildHal
opts.buildAvgs = buildAvgs
opts.buildFasta = True
if toilStats:
opts.stats = True
opts.seqFile = seqFile
Borrows heavily from the argparse documentation examples:
"""
# Construct the parser (which is stored in parser)
# Module docstring lives in __doc__
# See http://python-forum.com/pythonforum/viewtopic.php?f=3&t=36847
# And a formatter class so our examples in the docstring look good. Isn't it
# convenient how we already wrapped it to 80 characters?
# See http://docs.python.org/library/argparse.html#formatter-class
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
# Add the Toil options so the job store is the first argument
Job.Runner.addToilOptions(parser)
# General options
parser.add_argument("--reference_metadata",
default="ftp://ftp.ncbi.nlm.nih.gov/genomes/all/"
"GCA_000001405.17_GRCh38.p2/"
"GCA_000001405.17_GRCh38.p2_assembly_structure/"
"all_alt_scaffold_placement.txt",
help="URL to download the reference metadata from")
parser.add_argument("--regions", nargs="*",
default=["BRCA1", "BRCA2", "CENX", "MHC", "SMA", "LRC_KIR"],
help="region names to download reads for")
parser.add_argument("--sample_ftp_root",
default=("ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/"
"1000_genomes_project/data"),
help="FTP directory to scan for samples")
parser.add_argument("--population_pattern", default="*",
:param cores: Optional parameter to set the number of cores per node. \
If not provided, we use the number of cores on the node that launches \
the service.
:type sudo: boolean
:type memory: int or string convertable by bd2k.util.humanize.human2bytes to an int
:type disk: int or string convertable by bd2k.util.humanize.human2bytes to an int
:type cores: int
"""
self.sudo = sudo
if cores is None:
cores = multiprocessing.cpu_count()
self.hostname = overrideLeaderIP
Job.Service.__init__(self, memory=memory, cores=cores, disk=disk)
with Toil(toil_options) as t:
if not t.options.restart:
input_file_ids = argparse.Namespace()
input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
input_file_ids.chrom_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
input_file_ids.hints_db = FileID.forPath(t.importFile('file://' + args.hints_db), args.hints_db)
if args.cgp_param is not None:
input_file_ids.cgp_param = FileID.forPath(t.importFile('file://' + args.cgp_param), args.cgp_param)
else:
input_file_ids.cgp_param = None
input_file_ids.gtf = FileID.forPath(t.importFile('file://' + args.gtf), args.gtf)
input_file_ids.cgp_cfg = FileID.forPath(t.importFile('file://' + args.cgp_cfg), args.cgp_cfg)
input_file_ids.fasta = {genome: FileID.forPath(t.importFile('file://' + fasta), fasta)
for genome, fasta in args.fasta_files.iteritems()}
du = tools.toilInterface.find_total_disk_usage([input_file_ids.hints_db], buffer='4G')
job = Job.wrapJobFn(setup, args, input_file_ids, memory='8G', disk=du)
results, stdout_file_ids, param_file_id = t.start(job)
else:
results, stdout_file_ids, param_file_id = t.restart()
tools.fileOps.ensure_file_dir(args.stdout_file)
with open(args.stdout_file, 'w') as outf, tools.fileOps.TemporaryFilePath() as tmp:
for (chrom, start, chunksize), stdout_file in stdout_file_ids.iteritems():
outf.write('## BEGIN CHUNK chrom: {} start: {} chunksize: {}\n'.format(chrom, start, chunksize))
t.exportFile(stdout_file, 'file://' + tmp)
for l in open(tmp):
outf.write(l)
for genome, (raw_gtf_file_id, joined_gtf_file_id, joined_gp_file_id) in results.iteritems():
tools.fileOps.ensure_file_dir(args.augustus_cgp_raw_gtf[genome])
t.exportFile(raw_gtf_file_id, 'file://' + args.augustus_cgp_raw_gtf[genome])
t.exportFile(joined_gtf_file_id, 'file://' + args.augustus_cgp_gtf[genome])
t.exportFile(joined_gp_file_id, 'file://' + args.augustus_cgp_gp[genome])
if args.cgp_param is None: