How to use the toil.lib.bioio.logger function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ComparativeGenomicsToolkit / cactus / src / cactus / shared / common.py View on Github external
makeEventHeadersAlphaNumeric=False):
    logLevel = getLogLevelString2(logLevel)
    # We pass in the genome->sequence map as a series of paired arguments: [genome, faPath]*N.
    pairs = [[genome, faPath] for genome, faPath in seqMap.items()]
    args = [item for sublist in pairs for item in sublist]

    args += ["--speciesTree", newickTreeString, "--cactusDisk", cactusDiskDatabaseString,
            "--logLevel", logLevel]
    if makeEventHeadersAlphaNumeric:
        args += ["--makeEventHeadersAlphaNumeric"]
    if outgroupEvents:
        args += ["--outgroupEvents", " ".join(outgroupEvents)]
    masterMessages = cactus_call(check_output=True,
                                 parameters=["cactus_setup"] + args)

    logger.info("Ran cactus setup okay")
    return [ i for i in masterMessages.split("\n") if i != '' ]
github ComparativeGenomicsToolkit / cactus / src / cactus / pipeline / DBserverControl.py View on Github external
def findOccupiedPorts():
    """Attempt to find all currently taken TCP ports.
    Returns a set of ints, representing taken ports."""
    netstatOutput = cactus_call(parameters=["netstat", "-tuplen"], check_output=True)
    ports = set()
    for line in netstatOutput.split("\n"):
        fields = line.split()
        if len(fields) != 9:
            # Header or other garbage line
            continue
        port = int(fields[3].split(':')[-1])
        ports.add(port)
    logger.debug('Detected ports in use: %s' % repr(ports))
    return ports
github ComparativeGenomicsToolkit / cactus / src / cactus / pipeline / cactus_workflow.py View on Github external
fileStore=fileStore, calculateWhichEndsToComputeSeparately=True):
            endToAlign, sequencesInEndAlignment, basesInEndAlignment = line.split()
            sequencesInEndAlignment = int(sequencesInEndAlignment)
            basesInEndAlignment = int(basesInEndAlignment)

            # Sanity check
            assert len(endsToAlign) == len(endSizes), "End alignments and size lists out of sync"

            #If we have a really big end align separately
            if basesInEndAlignment >= veryLargeEndSize:
                alignmentID = self.addChild(CactusBarEndAlignerWrapper(self.phaseNode, self.constantsNode,
                                                        self.cactusDiskDatabaseString, self.flowerNames,
                                                        self.flowerSizes, True, [ endToAlign ], [ basesInEndAlignment ],
                                                        cactusWorkflowArguments=self.cactusWorkflowArguments)).rv()
                precomputedAlignmentIDs.append(alignmentID)
                logger.info("Precomputing very large end alignment for %s with %i caps and %i bases" % \
                             (endToAlign, sequencesInEndAlignment, basesInEndAlignment))
            else:
                endsToAlign.append(endToAlign)
                endSizes.append(basesInEndAlignment)
                if sum(endSizes) >= maxFlowerGroupSize:
                    alignmentID = self.addChild(CactusBarEndAlignerWrapper(self.phaseNode,
                                                       self.constantsNode,
                                                       self.cactusDiskDatabaseString,
                                                       self.flowerNames, self.flowerSizes, False,
                                                       endsToAlign, endSizes,
                                                       cactusWorkflowArguments=self.cactusWorkflowArguments)).rv()
                    precomputedAlignmentIDs.append(alignmentID)
                    endsToAlign = []
                    endSizes = []
        if len(endsToAlign) > 0:
            precomputedAlignmentIDs.append(self.addChild(CactusBarEndAlignerWrapper(
github ComparativeGenomicsToolkit / cactus / src / cactus / progressive / cactus_progressive.py View on Github external
# image is pulled.
            # NOTE: singularity writes images in the current directory only
            #       when SINGULARITY_CACHEDIR is not set
            oldCWD = os.getcwd()
            os.chdir(os.path.dirname(imgPath))
            # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now.
            try:
                check_call(["singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath),
                            "docker://" + getDockerImage()])
            except CalledProcessError:
                # Call failed, try without --size, required for singularity 3+
                check_call(["singularity", "pull", "--name", os.path.basename(imgPath),
                            "docker://" + getDockerImage()])
            os.chdir(oldCWD)
        else:
            logger.info("Using pre-built singularity image: '{}'".format(imgPath))
github ComparativeGenomicsToolkit / cactus / src / cactus / pipeline / DBserverControl.py View on Github external
down the DB and save the results. After finishing, the data will
    eventually be written to snapshotFile.

    Returns a tuple containing an updated version of the database config dbElem and the
    path to the log file.
    """
    logPath = fileStore.getLocalTempFile()
    dbElem.setDbHost(getHostName())

    # Find a suitable port to run on.
    try:
        occupiedPorts = findOccupiedPorts()
        unoccupiedPorts = set(xrange(1025, _MAX_DBSERVER_PORT)) - occupiedPorts
        port = random.choice(list(unoccupiedPorts))
    except:
        logger.warning("Can't find which ports are occupied--likely netstat is not installed."
                       " Choosing a random port to start the DB on, good luck!")
        port = random.randint(1025, _MAX_DBSERVER_PORT)
    dbElem.setDbPort(port)

    process = DBServerProcess(dbElem, logPath, fileStore, existingSnapshotID, snapshotExportID)
    process.daemon = True
    process.start()

    if not blockUntilDBserverIsRunning(dbElem):
        raise RuntimeError("Unable to launch DBserver in time.")

    return process, dbElem, logPath
github ComparativeGenomicsToolkit / cactus / src / cactus / repeats / cactus_repeats.py View on Github external
def run(self, fileStore):
        logger.info("Raising sampling rate from %f to %f" % (self.oldSamplingRate, self.newSamplingRate))
        coveredSeedsFiles = [os.path.basename(fileStore.readGlobalFile(coveredSeedsID)) for coveredSeedsID in
                self.coveredSeedsIDs]
        samplingRates = fileStore.readGlobalFile(self.samplingRatesID)
        newSamplingRates = os.path.basename(fileStore.getLocalTempFile())
        logger.info("Work dir = %s" % os.path.dirname(samplingRates))
        cactus_call(outfile=newSamplingRates, parameters=["cactus_adjustSamplingRates",
                                                          "--oldSamplingRate", str(self.oldSamplingRate),
                                                          "--newSamplingRate", str(self.newSamplingRate),
                                                          "--oldSamplingRates", os.path.basename(samplingRates),
                                                          "--coveredSeedsFiles", ",".join(coveredSeedsFiles)])

        return fileStore.writeGlobalFile(newSamplingRates)
github ComparativeGenomicsToolkit / cactus / src / cactus / shared / common.py View on Github external
else:
            # Too many children, so we have to build a tree to avoid
            # bottlenecking on consistently serializing all the jobs.
            for job in self.queuedChildJobs:
                job.prepareForPromiseRegistration(fileStore.jobStore)

            curLevel = self.queuedChildJobs
            while len(curLevel) > self.maxChildrenPerJob:
                curLevel = [curLevel[i:i + self.maxChildrenPerJob] for i in xrange(0, len(curLevel), self.maxChildrenPerJob)]
            # curLevel is now a nested list (of lists, of lists...)
            # representing a tree of out-degree no higher than
            # maxChildrenPerJob. We can pass that to SpawnChildren
            # instances, which will run down the tree and eventually
            # spawn the jobs we're actually interested in running.
            for sublist in curLevel:
                logger.debug(sublist)
                assert isinstance(sublist, list)
                super(ChildTreeJob, self).addChild(SpawnChildren(sublist))
        return ret
github ComparativeGenomicsToolkit / cactus / src / cactus / shared / common.py View on Github external
else:
            # Too many children, so we have to build a tree to avoid
            # bottlenecking on consistently serializing all the jobs.
            for job in self.queuedChildJobs:
                job.prepareForPromiseRegistration(fileStore.jobStore)

            curLevel = self.queuedChildJobs
            while len(curLevel) > self.maxChildrenPerJob:
                curLevel = [curLevel[i:i + self.maxChildrenPerJob] for i in xrange(0, len(curLevel), self.maxChildrenPerJob)]
            # curLevel is now a nested list (of lists, of lists...)
            # representing a tree of out-degree no higher than
            # maxChildrenPerJob. We can pass that to SpawnChildren
            # instances, which will run down the tree and eventually
            # spawn the jobs we're actually interested in running.
            for sublist in curLevel:
                logger.debug(sublist)
                assert isinstance(sublist, list)
                super(ChildTreeJob, self).addChild(SpawnChildren(sublist))
        return ret
github ComparativeGenomicsToolkit / cactus / src / cactus / preprocessor / cactus_preprocessor.py View on Github external
inSequence = fileStore.readGlobalFile(self.inSequenceID)

        if self.prepOptions.chunkSize <= 0:
            # In this first case we don't need to break up the sequence
            chunked = False
            inChunkList = [inSequence]
        else:
            # chunk it up
            chunked = True
            inChunkDirectory = getTempDirectory(rootDir=fileStore.getLocalTempDir())
            inChunkList = runGetChunks(sequenceFiles=[inSequence], chunksDir=inChunkDirectory,
                                       chunkSize=self.prepOptions.chunkSize,
                                       overlapSize=0)
            inChunkList = [os.path.abspath(path) for path in inChunkList]
        logger.info("Chunks = %s" % inChunkList)

        inChunkIDList = [fileStore.writeGlobalFile(chunk, cleanup=True) for chunk in inChunkList]
        outChunkIDList = []
        #For each input chunk we create an output chunk, it is the output chunks that get concatenated together.
        if not self.chunksToCompute:
            self.chunksToCompute = range(len(inChunkList))
        for i in self.chunksToCompute:
            #Calculate the number of chunks to use
            inChunkNumber = int(max(1, math.ceil(len(inChunkList) * self.prepOptions.proportionToSample)))
            assert inChunkNumber <= len(inChunkList) and inChunkNumber > 0
            #Now get the list of chunks flanking and including the current chunk
            j = max(0, i - inChunkNumber/2)
            inChunkIDs = inChunkIDList[j:j+inChunkNumber]
            if len(inChunkIDs) < inChunkNumber: #This logic is like making the list circular
                inChunkIDs += inChunkIDList[:inChunkNumber-len(inChunkIDs)]
            assert len(inChunkIDs) == inChunkNumber
github ComparativeGenomicsToolkit / cactus / src / cactus / repeats / cactus_repeats.py View on Github external
def run(self, fileStore):
        samplingRates = fileStore.readGlobalFile(self.samplingRatesID)
        chunk1 = fileStore.readGlobalFile(self.chunkID1)
        chunk2 = fileStore.readGlobalFile(self.chunkID2)
        lastZSequenceHandling  = ['%s[multiple,unmask][nameparse=darkspace]' % os.path.basename(chunk1), '%s[nameparse=darkspace][unmask]' % os.path.basename(chunk2)]
        alignments = fileStore.getLocalTempFile()
        logger.info("Work dir = %s" % os.path.dirname(chunk1))
        cactus_call(outfile=alignments,
                    parameters=["cPecanLastz"] + lastZSequenceHandling +
                                ["--samplingRates=%s" % os.path.basename(samplingRates),
                                "--notrivial",
                                "--format=general:name1,zstart1,end1,name2,zstart2+,end2+", "--markend"])

        coveredSeeds = fileStore.getLocalTempFile()
        cactus_call(outfile=coveredSeeds,
                    parameters=["cactus_coveredSeeds",
                                "--seq", chunk1,
                                "--alignments", alignments])
        return fileStore.writeGlobalFile(coveredSeeds)