Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
j2.addFollowOnJobFn(create_data_loader, dataset_name, cdd)
j2.addFollowOnJobFn(convert_pdb_to_mmtf, dataset_name, cdd)
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
dataset_name = options.jobStore.split(":")[-1]
job = Job.wrapJobFn(start_toil, dataset_name)
with Toil(options) as toil:
toil.start(job)
action="store_true",
default=False)
options = parser.parse_args()
options.logLevel = "DEBUG"
#options.clean = "always"
options.targetTime = 1
if options.cathcode is not None:
options.cathcode = [c.split(".") for c in options.cathcode]
sfam_file = os.path.abspath("cath.h5")
if not os.path.isfile(sfam_file):
store = IOStore.get("aws:us-east-1:molmimic-cath")
store.read_input_file("cath-domain-description-file-small.h5", sfam_file)
with Toil(options) as workflow:
if not workflow.options.restart:
cathFileStoreID = workflow.importFile("file://" + os.path.abspath(sfam_file))
job = Job.wrapJobFn(start_toil, cathFileStoreID, cathcode=options.cathcode,
update_features=options.features, force=options.force)
workflow.start(job)
else:
workflow.restart()
return
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
dataset_name = options.jobStore.split(":")[-1]
print "Running"
job = Job.wrapJobFn(start_toil, dataset_name)
with Toil(options) as toil:
toil.start(job)
def main():
""" Reports stats on the workflow, use with --stats option to toil.
"""
parser = getBasicOptionParser()
initializeOptions(parser)
options = parseBasicOptions(parser)
checkOptions(options, parser)
config = Config()
config.setOptions(options)
jobStore = Toil.resumeJobStore(config.jobStore)
stats = getStats(jobStore)
collatedStatsTag = processData(jobStore.config, stats)
reportData(collatedStatsTag, options)
# cores=max_cores)
del sdoms
os.remove(sdoms_file)
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
job = Job.wrapJobFn(start_toil)
with Toil(options) as toil:
toil.start(job)
def runCactusProgressive(options):
with Toil(options) as toil:
importSingularityImage(options)
#Run the workflow
if options.restart:
halID = toil.restart()
else:
options.cactusDir = getTempDirectory()
#Create the progressive cactus project
projWrapper = ProjectWrapper(options)
projWrapper.writeXml()
pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
'%s_project.xml' % ProjectWrapper.alignmentDirName)
assert os.path.exists(pjPath)
project = MultiCactusProject()
def main():
parser = getBasicOptionParser()
parser.add_argument("jobStore", type=str,
help="The location of the job store used by the workflow whose jobs should "
"be killed." + jobStoreLocatorHelp)
parser.add_argument("--version", action='version', version=version)
options = parseBasicOptions(parser)
config = Config()
config.setOptions(options)
config.jobStore = config.jobStore[5:] if config.jobStore.startswith('file:') else config.jobStore
# ':' means an aws/google jobstore; use the old (broken?) method
if ':' in config.jobStore:
jobStore = Toil.resumeJobStore(config.jobStore)
logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
# TODO: This behaviour is now broken src: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
batchSystem = Toil.createBatchSystem(jobStore.config) # Should automatically kill existing jobs, so we're good.
for jobID in batchSystem.getIssuedBatchJobIDs(): # Just in case we do it again.
batchSystem.killBatchJobs(jobID)
logger.info("All jobs SHOULD have been killed")
# otherwise, kill the pid recorded in the jobstore
else:
pid_log = os.path.join(os.path.abspath(config.jobStore), 'pid.log')
with open(pid_log, 'r') as f:
pid2kill = f.read().strip()
try:
os.kill(int(pid2kill), signal.SIGKILL)
logger.info("Toil process %s successfully terminated." % str(pid2kill))
except OSError:
logger.error("Toil process %s could not be terminated." % str(pid2kill))
parser.add_argument('--num-lines', default=1000, help='Number of lines in file to sort.', type=int)
parser.add_argument('--line-length', default=50, help='Length of lines in file to sort.', type=int)
parser.add_argument("--N",
help="The threshold below which a serial sort function is used to sort file. "
"All lines must of length less than or equal to N or program will fail",
default=10000)
options = parser.parse_args()
if int(options.N) <= 0:
raise RuntimeError("Invalid value of N: %s" % options.N)
file_name = 'file_to_sort.txt'
make_file_to_sort(file_name=file_name, lines=options.num_lines, line_length=options.line_length)
with Toil(options) as toil:
sort_file_url = 'file://' + os.path.abspath('file_to_sort.txt')
if not toil.options.restart:
sort_file_id = toil.importFile(sort_file_url)
sorted_file_id = toil.start(Job.wrapJobFn(setup, sort_file_id, int(options.N), False, memory='600M'))
else:
sorted_file_id = toil.restart()
toil.exportFile(sorted_file_id, sort_file_url)
def runCactusWorkflow(args):
##########################################
#Construct the arguments.
##########################################
parser = ArgumentParser()
Job.Runner.addToilOptions(parser)
addCactusWorkflowOptions(parser)
options = parser.parse_args(args)
options.disableCaching = True
setLoggingFromOptions(options)
experimentWrapper = ExperimentWrapper(ET.parse(options.experimentFile).getroot())
with Toil(options) as toil:
seqIDMap = dict()
for name in experimentWrapper.getGenomesWithSequence():
fullSeq = getTempFile()
seq = experimentWrapper.getSequenceID(name)
if os.path.isdir(seq):
catFiles([os.path.join(seq, seqFile) for seqFile in os.listdir(seq)], fullSeq)
else:
fullSeq = seq
experimentWrapper.setSequenceID(name, toil.importFile(makeURL(fullSeq)))
print name, experimentWrapper.getSequenceID(name)
experimentWrapper.writeXML(options.experimentFile)
configNode = ET.parse(experimentWrapper.getConfigPath()).getroot()
print seqIDMap
cactusWorkflowArguments = CactusWorkflowArguments(options, experimentFile=options.experimentFile, configNode=configNode, seqIDMap=seqIDMap)
def workerCleanup(info):
"""
Cleans up the worker node on batch system shutdown. Also see :meth:`supportsWorkerCleanup`.
:param WorkerCleanupInfo info: A named tuple consisting of all the relevant information
for cleaning up the worker.
"""
assert isinstance(info, WorkerCleanupInfo)
workflowDir = Toil.getWorkflowDir(info.workflowID, info.workDir)
DeferredFunctionManager.cleanupWorker(workflowDir)
workflowDirContents = os.listdir(workflowDir)
AbstractFileStore.shutdownFileStore(workflowDir, info.workflowID)
if (info.cleanWorkDir == 'always'
or info.cleanWorkDir in ('onSuccess', 'onError')
and workflowDirContents in ([], [cacheDirName(info.workflowID)])):
shutil.rmtree(workflowDir, ignore_errors=True)