How to use the toil.job.Job.wrapJobFn function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github edraizen / molmimic / molmimic / generate_data / calculate_features.py View on Github external
os.remove(pdb_file)

    #job.addChildJobFn(calculate_features, "301320/yc/1YCS_A_sdi225433_d0.pdb")

if __name__ == "__main__":
    from toil.common import Toil
    from toil.job import Job

    parser = Job.Runner.getDefaultArgumentParser()
    options = parser.parse_args()
    options.logLevel = "DEBUG"
    options.clean = "always"
    options.targetTime = 1

    job = Job.wrapJobFn(start_toil)
    with Toil(options) as toil:
        toil.start(job)
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / align_transcripts.py View on Github external
with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.ref_genome_fasta = tools.toilInterface.write_fasta_to_filestore(t, args.ref_genome_fasta)
            input_file_ids.genome_fasta = tools.toilInterface.write_fasta_to_filestore(t, args.genome_fasta)
            input_file_ids.annotation_gp = FileID.forPath(t.importFile('file://' + args.annotation_gp),
                                                          args.annotation_gp)
            input_file_ids.ref_db = FileID.forPath(t.importFile('file://' + args.ref_db_path), args.ref_db_path)
            input_file_ids.modes = {}
            file_ids = [input_file_ids.ref_genome_fasta, input_file_ids.genome_fasta, input_file_ids.annotation_gp,
                        input_file_ids.ref_db]
            for mode in args.transcript_modes:
                input_file_ids.modes[mode] = t.importFile('file://' + args.transcript_modes[mode]['gp'])
                file_ids.append(input_file_ids.modes[mode])
            disk_usage = tools.toilInterface.find_total_disk_usage(file_ids)
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='16G', disk=disk_usage)
            results_file_ids = t.start(job)
        else:
            results_file_ids = t.restart()
        for file_path, file_id in results_file_ids.iteritems():
            tools.fileOps.ensure_file_dir(file_path)
            t.exportFile(file_id, 'file://' + file_path)
github BD2KGenomics / hgvm-graph-bakeoff-evaluations / scripts / variantCallingExperiment.py View on Github external
def main(args):
    
    options = parse_args(args) 
    
    RealTimeLogger.start_master()

    # Make a root job
    root_job = Job.wrapJobFn(run_and_evaluate, options,
                             cores=1, memory="2G", disk="2G")
    
    # Run it and get the return value
    answer = Job.Runner.startToil(root_job,  options)

    RealTimeLogger.stop_master()
    
    print("Root return value:")
    print(answer)
github BD2KGenomics / toil-scripts / src / toil_scripts / gatk_germline / germline.py View on Github external
'got %s.' % (hard_filter_field, inputs[hard_filter_field]))

        # Set resource parameters
        inputs['xmx'] = human2bytes(inputs['xmx'])
        inputs['file_size'] = human2bytes(inputs['file_size'])
        inputs['cores'] = int(inputs['cores'])

        inputs['annotations'] = set(inputs['snp_filter_annotations'] + inputs['indel_filter_annotations'])

        # HaplotypeCaller test data for testing
        inputs['hc_output'] = inputs.get('hc_output', None)

        # It is a toil-scripts convention to store input parameters in a Namespace object
        config = argparse.Namespace(**inputs)

        root = Job.wrapJobFn(run_gatk_germline_pipeline, samples, config)
        Job.Runner.startToil(root, options)
github edraizen / molmimic / molmimic / generate_data / build_full_dataset.py View on Github external
def start_toil(dataset_name, options, use_data=False):
    if use_data:
        data = Job.wrapJobFn(download_data.start_toil).encapsulate()
        mmdb2pdb = data.addFollowOnJobFn(convert_mmdb_to_pdb.start_toil).encapsulate()
    else:
        mmdb2pdb = Job.wrapJobFn(convert_mmdb_to_pdb.start_toil).encapsulate()

    interactome = mmdb2pdb.addChildJobFn(get_structural_interactome.start_toil, dataset_name).encapsulate()
    bsa = interactome.addFollowOnJobFn(calculate_bsa.start_toil, dataset_name).encapsulate()

    prep_protein = mmdb2pdb.addChildJobFn(prepare_protein.start_toil, dataset_name).encapsulate()
    features = mmdb2pdb.addFollowOnJobFn(calculate_features.start_toil, dataset_name, name="features").encapsulate()

    filter = mmdb2pdb.addFollowOnJobFn(filter_dataset.start_toil, dataset_name, name="filter").encapsulate()

    with Toil(options) as toil:
        toil.start(mmdb2pdb if not use_data else data)
github edraizen / molmimic / molmimic / generate_data / get_structural_interactome.py View on Github external
#Cleanup
    job.addFollowOnJobFn(cleanup)
    os.remove(ibis_obs_path)
    os.remove(pdb_path)

if __name__ == "__main__":
    from toil.common import Toil
    from toil.job import Job

    parser = Job.Runner.getDefaultArgumentParser()
    options = parser.parse_args()
    options.logLevel = "DEBUG"
    options.clean = "always"
    options.targetTime = 1

    job = Job.wrapJobFn(start_toil)
    with Toil(options) as toil:
        toil.start(job)
github BD2KGenomics / toil-scripts / src / toil_scripts / gatk_germline / germline.py View on Github external
config.run_oncotator        If True, then adds Oncotator to pipeline
        Additional parameters are needed for downstream steps. Refer to pipeline README for more information.
    """
    # Determine the available disk space on a worker node before any jobs have been run.
    work_dir = job.fileStore.getLocalTempDir()
    st = os.statvfs(work_dir)
    config.available_disk = st.f_bavail * st.f_frsize

    # Check that there is a reasonable number of samples for joint genotyping
    num_samples = len(samples)
    if config.joint_genotype and not 30 < num_samples < 200:
        job.fileStore.logToMaster('WARNING: GATK recommends batches of '
                                  '30 to 200 samples for joint genotyping. '
                                  'The current cohort has %d samples.' % num_samples)

    shared_files = Job.wrapJobFn(download_shared_files, config).encapsulate()
    job.addChild(shared_files)

    if config.preprocess_only:
        for sample in samples:
            shared_files.addChildJobFn(prepare_bam,
                                       sample.uuid,
                                       sample.url,
                                       shared_files.rv(),
                                       paired_url=sample.paired_url,
                                       rg_line=sample.rg_line)
    else:
        run_pipeline = Job.wrapJobFn(gatk_germline_pipeline,
                                     samples,
                                     shared_files.rv()).encapsulate()
        shared_files.addChild(run_pipeline)
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / CAT / generate_hints_db.py View on Github external
continue
                for bam_path in cfg[dtype][genome]:
                    validate_bam_fasta_pairs(bam_path, fasta_sequences, genome)
                    is_paired = bam_is_paired(bam_path)
                    bam_file_ids[dtype][os.path.basename(bam_path)] = (toil.importFile('file://' + bam_path),
                                                                       toil.importFile('file://' + bam_path + '.bai'),
                                                                       is_paired)
                    is_paired_str = 'paired' if is_paired else 'not paired'
                    logger.info('BAM {} is valid and was inferred to be {}.'.format(os.path.basename(bam_path),
                                                                                    is_paired_str))
            input_file_ids = {'bams': bam_file_ids,
                              'annotation': toil.importFile('file://' + annotation) if annotation is not None else None}
            logger.info('{} has {} valid intron-only BAMs and {} valid BAMs. '
                        'Beginning Toil hints pipeline.'.format(genome, len(bam_file_ids['INTRONBAM']),
                                                                len(bam_file_ids['BAM'])))
            job = Job.wrapJobFn(setup_hints, input_file_ids)
            combined_hints = toil.start(job)
        else:
            logger.info('Restarting Toil hints pipeline for {}.'.format(genome))
            combined_hints = toil.restart()
        tools.fileOps.ensure_file_dir(out_gff_path)
        toil.exportFile(combined_hints, 'file://' + out_gff_path)
github BD2KGenomics / hgvm-graph-bakeoff-evaluations / scripts / collateStatistics.py View on Github external
"""
    Parses command line arguments and do the work of the program.
    "args" specifies the program arguments, with args[0] being the executable
    name. The return value should be used as the program's exit code.
    """
    
    if len(args) == 2 and args[1] == "--test":
        # Run the tests
        return doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
    
    options = parse_args(args) # This holds the nicely-parsed options object
    
    RealTimeLogger.start_master()
    
    # Make a root job
    root_job = Job.wrapJobFn(collate_all, options,
        cores=1, memory="1G", disk="1G")
    
    # Run it and see how many jobs fail
    failed_jobs = Job.Runner.startToil(root_job,  options)
    
    if failed_jobs > 0:
        raise Exception("{} jobs failed!".format(failed_jobs))
        
    print("All jobs completed successfully")
    
    RealTimeLogger.stop_master()