Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
os.remove(pdb_file)
#job.addChildJobFn(calculate_features, "301320/yc/1YCS_A_sdi225433_d0.pdb")
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
options.targetTime = 1
job = Job.wrapJobFn(start_toil)
with Toil(options) as toil:
toil.start(job)
with Toil(toil_options) as t:
if not t.options.restart:
input_file_ids = argparse.Namespace()
input_file_ids.ref_genome_fasta = tools.toilInterface.write_fasta_to_filestore(t, args.ref_genome_fasta)
input_file_ids.genome_fasta = tools.toilInterface.write_fasta_to_filestore(t, args.genome_fasta)
input_file_ids.annotation_gp = FileID.forPath(t.importFile('file://' + args.annotation_gp),
args.annotation_gp)
input_file_ids.ref_db = FileID.forPath(t.importFile('file://' + args.ref_db_path), args.ref_db_path)
input_file_ids.modes = {}
file_ids = [input_file_ids.ref_genome_fasta, input_file_ids.genome_fasta, input_file_ids.annotation_gp,
input_file_ids.ref_db]
for mode in args.transcript_modes:
input_file_ids.modes[mode] = t.importFile('file://' + args.transcript_modes[mode]['gp'])
file_ids.append(input_file_ids.modes[mode])
disk_usage = tools.toilInterface.find_total_disk_usage(file_ids)
job = Job.wrapJobFn(setup, args, input_file_ids, memory='16G', disk=disk_usage)
results_file_ids = t.start(job)
else:
results_file_ids = t.restart()
for file_path, file_id in results_file_ids.iteritems():
tools.fileOps.ensure_file_dir(file_path)
t.exportFile(file_id, 'file://' + file_path)
def main(args):
options = parse_args(args)
RealTimeLogger.start_master()
# Make a root job
root_job = Job.wrapJobFn(run_and_evaluate, options,
cores=1, memory="2G", disk="2G")
# Run it and get the return value
answer = Job.Runner.startToil(root_job, options)
RealTimeLogger.stop_master()
print("Root return value:")
print(answer)
'got %s.' % (hard_filter_field, inputs[hard_filter_field]))
# Set resource parameters
inputs['xmx'] = human2bytes(inputs['xmx'])
inputs['file_size'] = human2bytes(inputs['file_size'])
inputs['cores'] = int(inputs['cores'])
inputs['annotations'] = set(inputs['snp_filter_annotations'] + inputs['indel_filter_annotations'])
# HaplotypeCaller test data for testing
inputs['hc_output'] = inputs.get('hc_output', None)
# It is a toil-scripts convention to store input parameters in a Namespace object
config = argparse.Namespace(**inputs)
root = Job.wrapJobFn(run_gatk_germline_pipeline, samples, config)
Job.Runner.startToil(root, options)
def start_toil(dataset_name, options, use_data=False):
if use_data:
data = Job.wrapJobFn(download_data.start_toil).encapsulate()
mmdb2pdb = data.addFollowOnJobFn(convert_mmdb_to_pdb.start_toil).encapsulate()
else:
mmdb2pdb = Job.wrapJobFn(convert_mmdb_to_pdb.start_toil).encapsulate()
interactome = mmdb2pdb.addChildJobFn(get_structural_interactome.start_toil, dataset_name).encapsulate()
bsa = interactome.addFollowOnJobFn(calculate_bsa.start_toil, dataset_name).encapsulate()
prep_protein = mmdb2pdb.addChildJobFn(prepare_protein.start_toil, dataset_name).encapsulate()
features = mmdb2pdb.addFollowOnJobFn(calculate_features.start_toil, dataset_name, name="features").encapsulate()
filter = mmdb2pdb.addFollowOnJobFn(filter_dataset.start_toil, dataset_name, name="filter").encapsulate()
with Toil(options) as toil:
toil.start(mmdb2pdb if not use_data else data)
#Cleanup
job.addFollowOnJobFn(cleanup)
os.remove(ibis_obs_path)
os.remove(pdb_path)
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
options.targetTime = 1
job = Job.wrapJobFn(start_toil)
with Toil(options) as toil:
toil.start(job)
config.run_oncotator If True, then adds Oncotator to pipeline
Additional parameters are needed for downstream steps. Refer to pipeline README for more information.
"""
# Determine the available disk space on a worker node before any jobs have been run.
work_dir = job.fileStore.getLocalTempDir()
st = os.statvfs(work_dir)
config.available_disk = st.f_bavail * st.f_frsize
# Check that there is a reasonable number of samples for joint genotyping
num_samples = len(samples)
if config.joint_genotype and not 30 < num_samples < 200:
job.fileStore.logToMaster('WARNING: GATK recommends batches of '
'30 to 200 samples for joint genotyping. '
'The current cohort has %d samples.' % num_samples)
shared_files = Job.wrapJobFn(download_shared_files, config).encapsulate()
job.addChild(shared_files)
if config.preprocess_only:
for sample in samples:
shared_files.addChildJobFn(prepare_bam,
sample.uuid,
sample.url,
shared_files.rv(),
paired_url=sample.paired_url,
rg_line=sample.rg_line)
else:
run_pipeline = Job.wrapJobFn(gatk_germline_pipeline,
samples,
shared_files.rv()).encapsulate()
shared_files.addChild(run_pipeline)
continue
for bam_path in cfg[dtype][genome]:
validate_bam_fasta_pairs(bam_path, fasta_sequences, genome)
is_paired = bam_is_paired(bam_path)
bam_file_ids[dtype][os.path.basename(bam_path)] = (toil.importFile('file://' + bam_path),
toil.importFile('file://' + bam_path + '.bai'),
is_paired)
is_paired_str = 'paired' if is_paired else 'not paired'
logger.info('BAM {} is valid and was inferred to be {}.'.format(os.path.basename(bam_path),
is_paired_str))
input_file_ids = {'bams': bam_file_ids,
'annotation': toil.importFile('file://' + annotation) if annotation is not None else None}
logger.info('{} has {} valid intron-only BAMs and {} valid BAMs. '
'Beginning Toil hints pipeline.'.format(genome, len(bam_file_ids['INTRONBAM']),
len(bam_file_ids['BAM'])))
job = Job.wrapJobFn(setup_hints, input_file_ids)
combined_hints = toil.start(job)
else:
logger.info('Restarting Toil hints pipeline for {}.'.format(genome))
combined_hints = toil.restart()
tools.fileOps.ensure_file_dir(out_gff_path)
toil.exportFile(combined_hints, 'file://' + out_gff_path)
"""
Parses command line arguments and do the work of the program.
"args" specifies the program arguments, with args[0] being the executable
name. The return value should be used as the program's exit code.
"""
if len(args) == 2 and args[1] == "--test":
# Run the tests
return doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
options = parse_args(args) # This holds the nicely-parsed options object
RealTimeLogger.start_master()
# Make a root job
root_job = Job.wrapJobFn(collate_all, options,
cores=1, memory="1G", disk="1G")
# Run it and see how many jobs fail
failed_jobs = Job.Runner.startToil(root_job, options)
if failed_jobs > 0:
raise Exception("{} jobs failed!".format(failed_jobs))
print("All jobs completed successfully")
RealTimeLogger.stop_master()