How to use the toil.job.Job.Runner function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github BD2KGenomics / toil-scripts / rna-seq-pipeline / rna-seq_pipeline.py View on Github external
bucket_name = s3_dir.split('/')[0]
    bucket_dir = '/'.join(s3_dir.split('/')[1:])
    # I/O
    uuid_tar = return_input_paths(job, work_dir, ids, 'uuid.tar.gz')
    # Upload to S3
    conn = boto.connect_s3()
    bucket = conn.get_bucket(bucket_name)
    k = Key(bucket)
    k.key = os.path.join(bucket_dir, uuid + '.tar.gz')
    k.set_contents_from_filename(uuid_tar)


if __name__ == "__main__":
    # Define Parser object and add to toil
    parser = build_parser()
    Job.Runner.addToilOptions(parser)
    args = parser.parse_args()

    # Store input_URLs for downloading
    inputs = {'config': args.config,
              'unc.bed': args.unc,
              'hg19.transcripts.fa': args.fasta,
              'composite_exons.bed': args.composite_exons,
              'normalize.pl': args.normalize,
              'output_dir': args.output_dir,
              'rsem_ref.zip': args.rsem_ref,
              'chromosomes.zip': args.chromosomes,
              'ebwt.zip': args.ebwt,
              'ssec': args.ssec,
              's3_dir': args.s3_dir,
              'uuid': None,
              'samples.zip': None,
github BD2KGenomics / toil-scripts / src / toil_scripts / defuse_pipeline / defuse_pipeline.py View on Github external
help='Path to the (filled in) manifest file, generated with "generate-manifest". '
                                 '\nDefault value: "%(default)s"')
    parser_run.add_argument('--fq', default=None, type=str,
                            help='URL for the sample BAM. URLs can take the form: http://, file://, s3://, '
                                 'and gnos://. The UUID for the sample must be given with the "--uuid" flag.')
    parser_run.add_argument('--uuid', default=None, type=str, help='Provide the UUID of a sample when using the'
                                                                   '"--bam" option')

    # If no arguments provided, print full help menu
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    writeToDebug('Debug log')

    Job.Runner.addToilOptions(parser)

    args = parser.parse_args()

    cwd = os.getcwd()
    if args.command == 'generate-config' or args.command == 'generate':
        generate_file(os.path.join(cwd, 'config-toil-defuse.yaml'), generate_config)
    if args.command == 'generate-manifest' or args.command == 'generate':
        generate_file(os.path.join(cwd, 'manifest-toil-defuse.tsv'), generate_manifest)
    if 'generate' in args.command:
        sys.exit()
    if args.command == 'run':
        # Read in config yaml file and set the default value to None
        config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()}
        check_for_required_parameters(config)

        # Program checks
github BD2KGenomics / toil-scripts / src / toil_scripts / rnaseq_cgl / rnaseq_cgl_pipeline.py View on Github external
'\nDefault value: "%(default)s"')
    group.add_argument('--manifest', default='manifest-toil-rnaseq.tsv', type=str,
                       help='Path to the (filled in) manifest file, generated with "generate-manifest". '
                            '\nDefault value: "%(default)s"')
    group.add_argument('--samples', default=None, nargs='+', type=str,
                       help='Space delimited sample URLs (any number). Samples must be tarfiles/tarballs that contain '
                            'fastq files. URLs follow the format: http://foo.com/sample.tar, '
                            'file:///full/path/to/file.tar. The UUID for the sample will be derived from the file.'
                            'Samples passed in this way will be assumed to be paired end, if using single-end data, '
                            'please use the manifest option.')
    # If no arguments provided, print full help menu
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    # Add Toil options
    Job.Runner.addToilOptions(parser_run)
    args = parser.parse_args()
    # Parse subparsers related to generation of config and manifest
    cwd = os.getcwd()
    if args.command == 'generate-config' or args.command == 'generate':
        generate_file(os.path.join(cwd, 'config-toil-rnaseq.yaml'), generate_config)
    if args.command == 'generate-manifest' or args.command == 'generate':
        generate_file(os.path.join(cwd, 'manifest-toil-rnaseq.tsv'), generate_manifest)
    # Pipeline execution
    elif args.command == 'run':
        require(os.path.exists(args.config), '{} not found. Please run '
                                             '"toil-rnaseq generate-config"'.format(args.config))
        if not args.samples:
            require(os.path.exists(args.manifest), '{} not found and no samples provided. Please '
                                                   'run "toil-rnaseq generate-manifest"'.format(args.manifest))
            samples = parse_samples(path_to_manifest=args.manifest)
        else:
github BD2KGenomics / hgvm-graph-bakeoff-evaluations / scripts / parallelAzureDownloader.py View on Github external
"""
    
    if len(args) == 2 and args[1] == "--test":
        # Run the tests
        return doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
    
    options = parse_args(args) # This holds the nicely-parsed options object
    
    RealTimeLogger.start_master()
    
    # Make a root job
    root_job = Job.wrapJobFn(copy_everything, options,
        cores=1, memory="1G", disk="4G")
    
    # Run it and see how many jobs fail
    failed_jobs = Job.Runner.startToil(root_job,  options)
    
    if failed_jobs > 0:
        raise Exception("{} jobs failed!".format(failed_jobs))
        
    print("All jobs completed successfully")
    
    RealTimeLogger.stop_master()
github BD2KGenomics / toil-scripts / src / toil_scripts / transfer_tcga_to_s3 / transfer_tcga_to_s3.py View on Github external
def main():
    """
    This is a Toil pipeline to transfer TCGA data into an S3 Bucket

    Data is pulled down with Genetorrent and transferred to S3 via S3AM.
    """
    # Define Parser object and add to toil
    parser = build_parser()
    Job.Runner.addToilOptions(parser)
    args = parser.parse_args()
    # Store inputs from argparse
    inputs = {'genetorrent': args.genetorrent,
              'genetorrent_key': args.genetorrent_key,
              'ssec': args.ssec,
              's3_dir': args.s3_dir}
    # Sanity checks
    if args.ssec:
        assert os.path.isfile(args.ssec)
    if args.genetorrent:
        assert os.path.isfile(args.genetorrent)
    if args.genetorrent_key:
        assert os.path.isfile(args.genetorrent_key)
    samples = parse_genetorrent(args.genetorrent)
    # Start pipeline
    # map_job accepts a function, an iterable, and *args. The function is launched as a child
github BD2KGenomics / hgvm-graph-bakeoff-evaluations / scripts / getAltReads.py View on Github external
options.log_host = socket.getfqdn()
    options.log_port = logging_server.server_address[1]
    
    RealTimeLogger.set_master(options)
    
    logger = RealTimeLogger.get()
    
    # Make the root job
    root_job = Job.wrapJobFn(downloadAllReads, options, 
        cores=1, memory="1G", disk="4G")
        
    print("Sending log from master")
    logger.info("This is the master")
    
    # Run Toil
    Job.Runner.startToil(root_job,  options)
        
    logging_server.shutdown()
    server_thread.join()
github BD2KGenomics / toil-scripts / src / toil_scripts / bwa_alignment / old_alignment_script / batch_align.py View on Github external
f_out.write(generate_unique_key(key_path, url))
    # Commands to upload to S3 via S3AM
    s3am_command = ['s3am',
                    'upload',
                    '--sse-key-file', os.path.join(work_dir, uuid + '.key'),
                    'file://{}'.format(os.path.join(work_dir, uuid + '.bam')),
                    bucket_name,
                    os.path.join(bucket_dir, uuid + '.bam')]

    subprocess.check_call(s3am_command)


if __name__ == "__main__":
    # Define Parser object and add to toil
    parser = build_parser()
    Job.Runner.addToilOptions(parser)
    args = parser.parse_args()

    # Store input_URLs for downloading
    inputs = {'config': args.config,
              'ref.fa': args.ref,
              'ref.fa.amb': args.amb,
              'ref.fa.ann': args.ann,
              'ref.fa.bwt': args.bwt,
              'ref.fa.pac': args.pac,
              'ref.fa.sa': args.sa,
              'ref.fa.fai': args.fai,
              'ssec':args.ssec,
              'output_dir': args.out,
              's3_dir': args.s3_dir,
              'cpu_count': None}
github edraizen / molmimic / molmimic / generate_data / ibis_old / cluster_proteins.py View on Github external
#sfams = sdoms["sfam_id"].drop_duplicates().dropna().tolist()

    #map_job(job, setup_clustering, sfams, pdbFileStoreID, resoluFileStoreID)

    # #Add jobs for each sdi
    setup_clustering(job, "299845", pdbFileStoreID, resoluFileStoreID, pdbs)

    #del sdoms
    os.remove(sdoms_file)
    os.remove(resolu_file)

if __name__ == "__main__":
    from toil.common import Toil
    from toil.job import Job

    parser = Job.Runner.getDefaultArgumentParser()
    options = parser.parse_args()
    options.clean = "always"
    options.targetTime = 1

    job = Job.wrapJobFn(start_toil)
    with Toil(options) as workflow:
        pdbs = [(os.path.basename(f), workflow.importFile('file://' + f)) for f in glob.glob("/root/ig/*/*.pdb")]
        workflow.start(Job.wrapJobFn(start_toil, pdbs))