How to use the toil.job.Job function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github edraizen / molmimic / generate_data / calculate_bsa.py View on Github external
def start_toil(job, dataset_name, name="bsa"):
    path = os.path.join(get_interfaces_path(dataset_name), "by_superfamily")
    for cdd, sfam_id in iter_cdd(use_id=True, group_superfam=True):
        sfam_path = os.path.join(path, str(int(sfam_id)), str(int(sfam_id)))
        if not os.path.isfile(sfam_path+".observed_interactome"):
            continue
        cjob = job.addChildJobFn(observed_bsa, dataset_name, sfam_id)
        if not os.path.isfile(sfam_path+".inferred_interactome"):
            continue
        cjob.addFollowOnJobFn(inferred_bsa, dataset_name, sfam_id)

if __name__ == "__main__":
    from toil.common import Toil
    from toil.job import Job

    parser = Job.Runner.getDefaultArgumentParser()
    options = parser.parse_args()
    options.logLevel = "DEBUG"
    options.clean = "always"
    dataset_name = options.jobStore.split(":")[-1]

    job = Job.wrapJobFn(start_toil, dataset_name)
    with Toil(options) as toil:
        toil.start(job)
github BD2KGenomics / toil-scripts / src / toil_scripts / adam_pipeline / spark_toil_script.py View on Github external
def main(args):
    
    parser = build_parser()
    Job.Runner.addToilOptions(parser)
    options = parser.parse_args()

    inputs = {'numWorkers': options.num_nodes - 1,
              'outDir':     options.output_directory,
              'bamName':    options.input_file_name,
              'knownSNPs':  options.known_SNPs,
              'driverMemory': options.driver_memory,
              'executorMemory': options.executor_memory,
              'sudo': options.sudo,
              'suffix': None}

    Job.Runner.startToil(Job.wrapJobFn(start_master, inputs), options)
github DataBiosphere / toil / examples / hello.py View on Github external
def __init__(self, message):
        Job.__init__(self,  memory="1G", cores=2, disk="2G")
        self.message = message
github ComparativeGenomicsToolkit / cactus / src / cactus / shared / common.py View on Github external
def runCactusProgressive(seqFile,
                         configFile,
                         toilDir,
                         logLevel=None, retryCount=0, 
                         batchSystem="single_machine", 
                         rescueJobFrequency=None,
                         skipAlignments=False,
                         buildHal=True,
                         buildAvgs=False,
                         toilStats=False,
                         maxCpus=None):
    opts = Job.Runner.getDefaultOptions(toilDir)
    opts.batchSystem = batchSystem if batchSystem is not None else opts.batchSystem
    opts.logLevel = logLevel if logLevel is not None else opts.logLevel
    opts.maxCores = maxCpus if maxCpus is not None else opts.maxCores
    # Used for tests
    opts.scale = 0.1
    opts.retryCount = retryCount if retryCount is not None else opts.retryCount
    # This *shouldn't* be necessary, but it looks like the toil
    # deadlock-detection still has issues.
    opts.deadlockWait = 3600

    opts.buildHal = buildHal
    opts.buildAvgs = buildAvgs
    opts.buildFasta = True
    if toilStats:
        opts.stats = True
    opts.seqFile = seqFile
github BD2KGenomics / hgvm-graph-bakeoff-evaluations / scripts / getAltReads.py View on Github external
Borrows heavily from the argparse documentation examples:
    
    """
    
    # Construct the parser (which is stored in parser)
    # Module docstring lives in __doc__
    # See http://python-forum.com/pythonforum/viewtopic.php?f=3&t=36847
    # And a formatter class so our examples in the docstring look good. Isn't it
    # convenient how we already wrapped it to 80 characters?
    # See http://docs.python.org/library/argparse.html#formatter-class
    parser = argparse.ArgumentParser(description=__doc__, 
        formatter_class=argparse.RawDescriptionHelpFormatter)
    
    # Add the Toil options so the job store is the first argument
    Job.Runner.addToilOptions(parser)

    # General options
    parser.add_argument("--reference_metadata", 
        default="ftp://ftp.ncbi.nlm.nih.gov/genomes/all/"
        "GCA_000001405.17_GRCh38.p2/"
        "GCA_000001405.17_GRCh38.p2_assembly_structure/"
        "all_alt_scaffold_placement.txt",
        help="URL to download the reference metadata from")
    parser.add_argument("--regions", nargs="*", 
        default=["BRCA1", "BRCA2", "CENX", "MHC", "SMA", "LRC_KIR"],
        help="region names to download reads for")
    parser.add_argument("--sample_ftp_root",
        default=("ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/"
        "1000_genomes_project/data"), 
        help="FTP directory to scan for samples")
    parser.add_argument("--population_pattern", default="*", 
github DataBiosphere / toil / src / toil / lib / spark.py View on Github external
:param cores: Optional parameter to set the number of cores per node. \
        If not provided, we use the number of cores on the node that launches \
        the service.
        :type sudo: boolean
        :type memory: int or string convertable by bd2k.util.humanize.human2bytes to an int
        :type disk: int or string convertable by bd2k.util.humanize.human2bytes to an int
        :type cores: int
        """
        self.sudo = sudo

        if cores is None:
            cores = multiprocessing.cpu_count()

        self.hostname = overrideLeaderIP

        Job.Service.__init__(self, memory=memory, cores=cores, disk=disk)
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / augustus_cgp.py View on Github external
with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
            input_file_ids.chrom_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
            input_file_ids.hints_db = FileID.forPath(t.importFile('file://' + args.hints_db), args.hints_db)
            if args.cgp_param is not None:
                input_file_ids.cgp_param = FileID.forPath(t.importFile('file://' + args.cgp_param), args.cgp_param)
            else:
                input_file_ids.cgp_param = None
                input_file_ids.gtf = FileID.forPath(t.importFile('file://' + args.gtf), args.gtf)
            input_file_ids.cgp_cfg = FileID.forPath(t.importFile('file://' + args.cgp_cfg), args.cgp_cfg)
            input_file_ids.fasta = {genome: FileID.forPath(t.importFile('file://' + fasta), fasta)
                                    for genome, fasta in args.fasta_files.iteritems()}
            du = tools.toilInterface.find_total_disk_usage([input_file_ids.hints_db], buffer='4G')
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='8G', disk=du)
            results, stdout_file_ids, param_file_id = t.start(job)
        else:
            results, stdout_file_ids, param_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.stdout_file)
        with open(args.stdout_file, 'w') as outf, tools.fileOps.TemporaryFilePath() as tmp:
            for (chrom, start, chunksize), stdout_file in stdout_file_ids.iteritems():
                outf.write('## BEGIN CHUNK chrom: {} start: {} chunksize: {}\n'.format(chrom, start, chunksize))
                t.exportFile(stdout_file, 'file://' + tmp)
                for l in open(tmp):
                    outf.write(l)
        for genome, (raw_gtf_file_id, joined_gtf_file_id, joined_gp_file_id) in results.iteritems():
            tools.fileOps.ensure_file_dir(args.augustus_cgp_raw_gtf[genome])
            t.exportFile(raw_gtf_file_id, 'file://' + args.augustus_cgp_raw_gtf[genome])
            t.exportFile(joined_gtf_file_id, 'file://' + args.augustus_cgp_gtf[genome])
            t.exportFile(joined_gp_file_id, 'file://' + args.augustus_cgp_gp[genome])
        if args.cgp_param is None: