How to use the toil.common.Toil function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github edraizen / molmimic / generate_data / prepare_protein.py View on Github external
j2.addFollowOnJobFn(create_data_loader, dataset_name, cdd)
        j2.addFollowOnJobFn(convert_pdb_to_mmtf, dataset_name, cdd)


if __name__ == "__main__":
    from toil.common import Toil
    from toil.job import Job

    parser = Job.Runner.getDefaultArgumentParser()
    options = parser.parse_args()
    options.logLevel = "DEBUG"
    options.clean = "always"
    dataset_name = options.jobStore.split(":")[-1]

    job = Job.wrapJobFn(start_toil, dataset_name)
    with Toil(options) as toil:
        toil.start(job)
github edraizen / molmimic / molmimic / generate_data / main.py View on Github external
action="store_true",
        default=False)
    options = parser.parse_args()
    options.logLevel = "DEBUG"
    #options.clean = "always"
    options.targetTime = 1

    if options.cathcode is not None:
        options.cathcode = [c.split(".") for c in options.cathcode]

    sfam_file = os.path.abspath("cath.h5")
    if not os.path.isfile(sfam_file):
        store = IOStore.get("aws:us-east-1:molmimic-cath")
        store.read_input_file("cath-domain-description-file-small.h5", sfam_file)

    with Toil(options) as workflow:
        if not workflow.options.restart:
            cathFileStoreID = workflow.importFile("file://" + os.path.abspath(sfam_file))
            job = Job.wrapJobFn(start_toil, cathFileStoreID, cathcode=options.cathcode,
                update_features=options.features, force=options.force)
            workflow.start(job)
        else:
            workflow.restart()
github edraizen / molmimic / generate_data / get_structural_interactome.py View on Github external
return

if __name__ == "__main__":
    from toil.common import Toil
    from toil.job import Job

    parser = Job.Runner.getDefaultArgumentParser()
    options = parser.parse_args()
    options.logLevel = "DEBUG"
    options.clean = "always"
    dataset_name = options.jobStore.split(":")[-1]

    print "Running"

    job = Job.wrapJobFn(start_toil, dataset_name)
    with Toil(options) as toil:
        toil.start(job)
github DataBiosphere / toil / src / toil / utils / toilStats.py View on Github external
def main():
    """ Reports stats on the workflow, use with --stats option to toil.
    """
    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
github edraizen / molmimic / molmimic / generate_data / prepare_protein.py View on Github external
#    cores=max_cores)

    del sdoms
    os.remove(sdoms_file)

if __name__ == "__main__":
    from toil.common import Toil
    from toil.job import Job

    parser = Job.Runner.getDefaultArgumentParser()
    options = parser.parse_args()
    options.logLevel = "DEBUG"
    options.clean = "always"

    job = Job.wrapJobFn(start_toil)
    with Toil(options) as toil:
        toil.start(job)
github ComparativeGenomicsToolkit / cactus / src / cactus / progressive / cactus_progressive.py View on Github external
def runCactusProgressive(options):
    with Toil(options) as toil:
        importSingularityImage(options)
        #Run the workflow
        if options.restart:
            halID = toil.restart()
        else:
            options.cactusDir = getTempDirectory()
            #Create the progressive cactus project 
            projWrapper = ProjectWrapper(options)
            projWrapper.writeXml()

            pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
                                  '%s_project.xml' % ProjectWrapper.alignmentDirName)
            assert os.path.exists(pjPath)

            project = MultiCactusProject()
github DataBiosphere / toil / src / toil / utils / toilKill.py View on Github external
def main():
    parser = getBasicOptionParser()

    parser.add_argument("jobStore", type=str,
                        help="The location of the job store used by the workflow whose jobs should "
                             "be killed." + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    config.jobStore = config.jobStore[5:] if config.jobStore.startswith('file:') else config.jobStore

    # ':' means an aws/google jobstore; use the old (broken?) method
    if ':' in config.jobStore:
        jobStore = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken src: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        batchSystem = Toil.createBatchSystem(jobStore.config)  # Should automatically kill existing jobs, so we're good.
        for jobID in batchSystem.getIssuedBatchJobIDs():  # Just in case we do it again.
            batchSystem.killBatchJobs(jobID)
        logger.info("All jobs SHOULD have been killed")
    # otherwise, kill the pid recorded in the jobstore
    else:
        pid_log = os.path.join(os.path.abspath(config.jobStore), 'pid.log')
        with open(pid_log, 'r') as f:
            pid2kill = f.read().strip()
        try:
            os.kill(int(pid2kill), signal.SIGKILL)
            logger.info("Toil process %s successfully terminated." % str(pid2kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid2kill))
github DataBiosphere / toil / attic / toil-sort-example.py View on Github external
parser.add_argument('--num-lines', default=1000, help='Number of lines in file to sort.', type=int)
    parser.add_argument('--line-length', default=50, help='Length of lines in file to sort.', type=int)
    parser.add_argument("--N",
                        help="The threshold below which a serial sort function is used to sort file. "
                        "All lines must of length less than or equal to N or program will fail",
                        default=10000)

    options = parser.parse_args()

    if int(options.N) <= 0:
        raise RuntimeError("Invalid value of N: %s" % options.N)

    file_name = 'file_to_sort.txt'
    make_file_to_sort(file_name=file_name, lines=options.num_lines, line_length=options.line_length)

    with Toil(options) as toil:
        sort_file_url = 'file://' + os.path.abspath('file_to_sort.txt')
        if not toil.options.restart:
            sort_file_id = toil.importFile(sort_file_url)
            sorted_file_id = toil.start(Job.wrapJobFn(setup, sort_file_id, int(options.N), False, memory='600M'))
        else:
            sorted_file_id = toil.restart()
        toil.exportFile(sorted_file_id, sort_file_url)
github ComparativeGenomicsToolkit / cactus / src / cactus / pipeline / cactus_workflow.py View on Github external
def runCactusWorkflow(args):
    ##########################################
    #Construct the arguments.
    ##########################################
    
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)
        
    options = parser.parse_args(args)
    options.disableCaching = True
    setLoggingFromOptions(options)

    experimentWrapper = ExperimentWrapper(ET.parse(options.experimentFile).getroot())
    with Toil(options) as toil:
        seqIDMap = dict()
        for name in experimentWrapper.getGenomesWithSequence():
            fullSeq = getTempFile()
            seq = experimentWrapper.getSequenceID(name)
            if os.path.isdir(seq):
                catFiles([os.path.join(seq, seqFile) for seqFile in os.listdir(seq)], fullSeq)
            else:
                fullSeq = seq
            experimentWrapper.setSequenceID(name, toil.importFile(makeURL(fullSeq)))
            print name, experimentWrapper.getSequenceID(name)

        experimentWrapper.writeXML(options.experimentFile)

        configNode = ET.parse(experimentWrapper.getConfigPath()).getroot()
        print seqIDMap
        cactusWorkflowArguments = CactusWorkflowArguments(options, experimentFile=options.experimentFile, configNode=configNode, seqIDMap=seqIDMap)
github DataBiosphere / toil / src / toil / batchSystems / abstractBatchSystem.py View on Github external
def workerCleanup(info):
        """
        Cleans up the worker node on batch system shutdown. Also see :meth:`supportsWorkerCleanup`.

        :param WorkerCleanupInfo info: A named tuple consisting of all the relevant information
               for cleaning up the worker.
        """
        assert isinstance(info, WorkerCleanupInfo)
        workflowDir = Toil.getWorkflowDir(info.workflowID, info.workDir)
        DeferredFunctionManager.cleanupWorker(workflowDir)
        workflowDirContents = os.listdir(workflowDir)
        AbstractFileStore.shutdownFileStore(workflowDir, info.workflowID)
        if (info.cleanWorkDir == 'always'
            or info.cleanWorkDir in ('onSuccess', 'onError')
            and workflowDirContents in ([], [cacheDirName(info.workflowID)])):
            shutil.rmtree(workflowDir, ignore_errors=True)