Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
flowcellFastqDir
flowcellBamDir
calls:
FastqPairToBwaBamFlow
supplies:
bamFile
fastq1File
fastq2File
"""
#
# 1. separate fastqs into matching pairs:
#
fqs = {}
fqDigger = FileDigger(".fastq.gz", ["Project_", "Sample_"])
for (project, sample, fqPath) in fqDigger.getNextFile(self.params.flowcellFastqDir) :
if (self.params.sampleNameList != None) and \
(len(self.params.sampleNameList) != 0) and \
(sample not in self.params.sampleNameList) : continue
fqFile = os.path.basename(fqPath)
w = (fqFile.split(".")[0]).split("_")
if len(w) != 5 :
raise Exception("Unexpected fastq filename format: '%s'" % (fqPath))
(sample2, index, lane, read, num) = w
if sample != sample2 :
raise Exception("Fastq name sample disagrees with directory sample: '%s;" % (fqPath))
key = (project, sample, index, lane, num)
if key not in fqs : fqs[key] = [None, None]
def main() :
# Instantiate the workflow
wflow = MutexWorkflow()
# Run the worklow:
retval = wflow.run(mode="local", nCores=6)
sys.exit(retval)
hardFlush(sys.stderr)
sys.exit(exitval)
def checkExpectArgCount(expectArgCount) :
if len(sys.argv) == expectArgCount : return
badUsage("Incorrect argument count, expected: %i observed: %i\n" % (expectArgCount, len(sys.argv)))
runid = "unknown"
taskStr = "unknown"
if len(sys.argv) > 2 :
runid = sys.argv[1]
taskStr = sys.argv[2]
bling = StringBling(runid, taskStr)
# send a signal for wrapper start as early as possible to help ensure hostname is logged
pffp = sys.stderr
bling.wrapperLog(pffp, "[wrapperSignal] wrapperStart")
checkExpectArgCount(4)
picklefile = sys.argv[3]
# try multiple times to read the argument file in case of NFS delay:
#
retryDelaySec = 30
maxTrials = 3
for _ in range(maxTrials) :
if os.path.exists(picklefile) : break
time.sleep(retryDelaySec)
parentdir=os.path.abspath(os.path.join(cwd,".."))
self.flowLog("testing pyflow cwd: '%s' parentdir: '%s'" % (cwd,parentdir))
# task will fail unless pwd == parentdir:
#
# test both absolute and relative cwd arguments:
#
self.addTask("testAbsCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd=parentdir)
self.addTask("testRelCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd="..")
# Instantiate the workflow
#
wflow = CwdWorkflow()
# Run the worklow:
#
retval=wflow.run(mode="local")
sys.exit(retval)
self.addTask("testAugmentedEnv", "[ $FOO == 'BAZ' ]; exit $?", env=augmented_env)
# test funny characters that have shown to cause trouble on some sge installations
funky_env = {}
funky_env["PATH"] = "/bin"
funky_env["_"] = "| %s %F \n"
# in this case we just want the job to run at all:
self.addTask("testFunkyEnv", "echo 'foo'; exit $?", env=funky_env)
assert("FOO" not in os.environ)
# Instantiate the workflow
#
wflow = EnvWorkflow()
# Run the worklow:
#
retval = wflow.run(mode="local")
sys.exit(retval)
class HelloWorkflow(WorkflowRunner) :
# a workflow is defined by overloading the WorkflowRunner.workflow() method:
#
def workflow(self) :
#
# The output for this task will be written to the file helloWorld.out.txt
#
self.addTask("easy_task1", "echo 'Hello World!' > helloWorld.out.txt")
# Instantiate the workflow
#
wflow = HelloWorkflow()
# Run the worklow:
#
retval = wflow.run()
# done!
sys.exit(retval)
updateCompletedTasks()
runningTaskCount = len(allTasks)-len(completedTasks)
self.flowLog("Completed/Running tasks: %i %i" % (len(completedTasks), runningTaskCount))
assert(runningTaskCount >= 0)
# launch new tasks until it is clear the total threshold will be met
if completedWork < self.totalContinuousWorkTarget :
numberOfTasksToLaunch = max(maxTaskCount-runningTaskCount,0)
for _ in range(numberOfTasksToLaunch) : launchNextTask()
time.sleep(5)
wflow = LaunchUntilWorkflow()
# Run the worklow:
#
retval=wflow.run(mode="local",nCores=8)
sys.exit(retval)
# completion of task1.
# pyflow will switch the task command between make and qmake
# depending on run type.
#
self.addTask("make_task", scriptDir, isCommandMakePath=True, nCores=2, dependencies="task1")
# This command 'unconfigures' the makefile
#
self.addTask("task2", "rm -f %s/Makefile" % scriptDir, dependencies="make_task")
# Instantiate the workflow
#
# parameters are passed into the workflow via its constructor:
#
wflow = MakeWorkflow()
# Run the worklow:
#
retval = wflow.run(mode="local", nCores=8)
sys.exit(retval)
# but this is site-configurable in pyflowConfig.py, so we
# specify it for every task here
#
# This works correctly if task 4 is the only task run in
# parallel with one of the other 3 tasks.
#
self.addTask("task1", "echo 'Hello World!'", memMb=2048)
self.addTask("task2", "echo 'Hello World!'", memMb=2048)
self.addTask("task3", "echo 'Hello World!'", memMb=2048)
self.addTask("task4", "echo 'Hello World!'", memMb=1)
# Instantiate the workflow
#
wflow = MemTestWorkflow()
# Run the worklow:
#
retval = wflow.run(nCores=8, memMb=2049)
# done!
sys.exit(retval)
self.taskIndex=taskIndex
self.inputFile=inputFile
self.nocopy = DeepCopyProtector()
self.nocopy.totalWorkCompleted = totalWorkCompleted
def workflow(self) :
import os
infp = open(self.inputFile, "rb")
value = int(infp.read().strip())
infp.close()
self.flowLog("File: %s Value: %i" % (self.inputFile, value))
os.remove(self.inputFile)
self.nocopy.totalWorkCompleted.addOrderedValue(self.taskIndex, value)
class LaunchUntilWorkflow(WorkflowRunner) :
def __init__(self):
self.totalContinuousWorkTarget = 100
def workflow(self):
taskByIndex = []
allTasks = set()
completedTasks = set()
totalWorkCompleted = SyncronizedAccumulator()
def launchNextTask() :
taskIndex = len(allTasks)
workerTaskLabel = "workerTask_%05i" % (taskIndex)
workerTaskFile = "outputFile_%05i" % (taskIndex)
workerTaskCmd=[sys.executable, workerJob, workerTaskFile]