How to use the jcvi.apps.base.mkdir function in jcvi

To help you get started, we’ve selected a few jcvi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / jcvi / jcvi / variation / deconvolute.py View on Github external
%prog merge folder1 ...

    Consolidate split contents in the folders. The folders can be generated by
    the split() process and several samples may be in separate fastq files. This
    program merges them.
    """
    p = OptionParser(merge.__doc__)
    p.set_outdir(outdir="outdir")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    folders = args
    outdir = opts.outdir
    mkdir(outdir)

    files = flatten(glob("{0}/*.*.fastq".format(x)) for x in folders)
    files = list(files)
    key = lambda x: op.basename(x).split(".")[0]
    files.sort(key=key)
    for id, fns in groupby(files, key=key):
        fns = list(fns)
        outfile = op.join(outdir, "{0}.fastq".format(id))
        FileMerger(fns, outfile=outfile).merge(checkexists=True)
github tanghaibao / jcvi / formats / sff.py View on Github external
Assemble each BAC separately using newbler.
    """
    from jcvi.formats.fasta import join

    p = OptionParser(assemble.__doc__)
    p.add_option("--overwrite", default=False, action="store_true",
            help="Overwrite the separate BAC assembly [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    sffdir, = args
    asmdir = "newbler"
    fastadir = "fasta"
    mkdir(asmdir, overwrite=opts.overwrite)
    mkdir(fastadir, overwrite=opts.overwrite)
    cmd = "runAssembly -cpu 8 -o {0} {1}"
    for sffile in glob("{0}/*.sff".format(sffdir)):
        pf = op.basename(sffile).split(".")[1]
        pf = pf.lower()
        outdir = op.join(asmdir, pf)
        if op.exists(outdir):
            logging.debug("`{0}` exists. Ignored.".format(outdir))
            continue

        acmd = cmd.format(outdir, sffile)
        sh(acmd)

        ctgfile = op.join(outdir, "454LargeContigs.fna")
        if not op.exists(ctgfile):  # newbler failure
            logging.error("File `{0}` not found (newbler failure).".\
github tanghaibao / jcvi / jcvi / assembly / preprocess.py View on Github external
if len(args) < 1:
        sys.exit(not p.print_help())

    fastq = args
    tag, tagj, taglj = "frag_reads", "jump_reads", "long_jump_reads"

    ploidy = opts.ploidy
    haploidify = opts.haploidify
    suffix = opts.suffix
    assert (not haploidify) or (haploidify and ploidy == '2')

    prepare(["Unknown"] + fastq + ["--norun"])

    datadir = opts.dir
    mkdir(datadir)
    fullpath = op.join(os.getcwd(), datadir)
    nthreads = " NUM_THREADS={0}".format(opts.cpus)
    phred64 = (guessoffset([args[0]]) == 64)

    orig = datadir + "/{0}_orig".format(tag)
    origfastb = orig + ".fastb"
    if need_update(fastq, origfastb):
        cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}' PLOIDY={2}".\
                format(fullpath, opts.cpus, ploidy)
        if phred64:
            cmd += " PHRED_64=True"
        sh(cmd)

    if op.exists(origfastb):
        correct_frag(datadir, tag, origfastb, nthreads, dedup=opts.fragsdedup,
                     haploidify=haploidify, suffix=suffix)
github tanghaibao / jcvi / jcvi / assembly / unitig.py View on Github external
def error(args):
    """
    %prog error version backup_folder

    Find all errors in ../5-consensus/*.err and pull the error unitigs into
    backup/ folder.
    """
    p = OptionParser(error.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    version, backup_folder = args
    mkdir(backup_folder)

    fw = open("errors.log", "w")

    seen = set()
    for g in glob("../5-consensus/*.err"):
        if "partitioned" in g:
            continue

        fp = open(g)
        partID = op.basename(g).rsplit(".err", 1)[0]
        partID = int(partID.split("_")[-1])

        for row in fp:
            if row.startswith(working):
                unitigID = row.split("(")[0].split()[-1]
                continue
github tanghaibao / jcvi / jcvi / assembly / postprocess.py View on Github external
rid = list(Fasta(ctgfasta).iterkeys())
    assert len(rid) == 1, "Use overlapbatch() to improve multi-FASTA file"

    rid = rid[0]
    splitctgfasta = ctgfasta.rsplit(".", 1)[0] + ".split.fasta"
    ctgfasta = run_gapsplit(infile=ctgfasta, outfile=splitctgfasta)

    # Run BLAST
    blastfile = ctgfasta + ".blast"
    run_megablast(infile=ctgfasta, outfile=blastfile, db=poolfasta)

    # Extract contigs and merge using minimus2
    closuredir = prefix + ".closure"
    closure = False
    if need_update(blastfile, closuredir):
        mkdir(closuredir, overwrite=True)
        closure = True

    if closure:
        idsfile = op.join(closuredir, prefix + ".ids")
        cmd = "cut -f2 {0} | sort -u".format(blastfile)
        sh(cmd, outfile=idsfile)

        idsfastafile = op.join(closuredir, prefix + ".ids.fasta")
        cmd = "faSomeRecords {0} {1} {2}".format(poolfasta, idsfile, idsfastafile)
        sh(cmd)

        # This step is a hack to weight the bases from original sequences more
        # than the pulled sequences, by literally adding another copy to be used
        # in consensus calls.
        redundantfastafile = op.join(closuredir, prefix + ".redundant.fasta")
        format([ctgfasta, redundantfastafile, "--prefix=RED."])
github tanghaibao / jcvi / jcvi / projects / synfind.py View on Github external
def write_lst(bedfile):
    pf = op.basename(bedfile).split(".")[0]
    mkdir(pf)
    bed = Bed(bedfile)
    stanza = []
    for seqid, bs in bed.sub_beds():
        fname = op.join(pf, "{0}.lst".format(seqid))
        fw = open(fname, "w")
        for b in bs:
            print("{0}{1}".format(b.accn.replace(" ", ""), b.strand), file=fw)
        stanza.append((seqid, fname))
        fw.close()
    return pf, stanza
github tanghaibao / jcvi / jcvi / projects / str.py View on Github external
a2 = row[tred + ".2"]
        fdp = row[tred + ".FDP"]
        pdp = row[tred + ".PDP"]
        pedp = row[tred + ".PEDP"]
        dp[sk] = (a1, a2, fdp, pdp, pedp)

    # Build a consolidated dataframe
    ef = pd.DataFrame.from_dict(dp, orient="index")
    ef.columns = [tred + ".1", tred + ".2", tred + ".FDP",
                  tred + ".PDP", tred + ".PEDP"]
    ef.index.name = "SampleKey"
    mf = df.merge(ef, how="right", left_index=True, right_index=True)

    # Plot a bunch of figures
    outdir = "output"
    mkdir(outdir)
    xlim = ylim = (0, 100)
    draw_jointplot(outdir + "/A", "MeanCoverage", "HD.FDP",
                   data=mf, xlim=xlim, ylim=ylim, format=format)
    draw_jointplot(outdir + "/B", "MeanCoverage", "HD.PDP",
                   data=mf, color='g', xlim=xlim, ylim=ylim, format=format)
    draw_jointplot(outdir + "/C", "MeanCoverage", "HD.PEDP",
                   data=mf, color='m', xlim=xlim, ylim=ylim, format=format)

    xlim = (0, 50)
    draw_jointplot(outdir + "/D", "HD.2", "HD.FDP",
                   data=mf, xlim=xlim, ylim=ylim, format=format)
    draw_jointplot(outdir + "/E", "HD.2", "HD.PDP",
                   data=mf, color='g', xlim=xlim, ylim=ylim, format=format)
    draw_jointplot(outdir + "/F", "HD.2", "HD.PEDP",
                   data=mf, color='m', xlim=xlim, ylim=ylim, format=format)
github tanghaibao / jcvi / jcvi / apps / phylo.py View on Github external
def build_ml_raxml(alignment, outfile, work_dir=".", **kwargs):
    """
    build maximum likelihood tree of DNA seqs with RAxML
    """
    work_dir = op.join(work_dir, "work")
    mkdir(work_dir)
    phy_file = op.join(work_dir, "aln.phy")
    AlignIO.write(alignment, file(phy_file, "w"), "phylip-relaxed")

    raxml_work = op.abspath(op.join(op.dirname(phy_file), "raxml_work"))
    mkdir(raxml_work)
    raxml_cl = RaxmlCommandline(cmd=RAXML_BIN("raxmlHPC"), \
        sequences=phy_file, algorithm="a", model="GTRGAMMA", \
        parsimony_seed=12345, rapid_bootstrap_seed=12345, \
        num_replicates=100, name="aln", \
        working_dir=raxml_work, **kwargs)

    logging.debug("Building ML tree using RAxML: %s" % raxml_cl)
    stdout, stderr = raxml_cl()

    tree_file = "{0}/RAxML_bipartitions.aln".format(raxml_work)
    if not op.exists(tree_file):
        print("***RAxML failed.", file=sys.stderr)
        sh("rm -rf %s" % raxml_work, log=False)
        return None
    sh("cp {0} {1}".format(tree_file, outfile), log=False)
github tanghaibao / jcvi / jcvi / apps / grid.py View on Github external
if self.concurrency:
            qsub += " -tc {0}".format(self.concurrency)
        if self.name:
            qsub += ' -N "{0}"'.format(self.name)
        if self.hold_jid:
            param = "-hold_jid_ad" if self.arr else "-hold_jid"
            qsub += " {0} {1}".format(param, self.hold_jid)
        if self.extra:
            qsub += " {0}".format(self.extra)

        # I/O
        infile = self.infile
        outfile = self.outfile
        errfile = self.errfile
        outdir = self.outdir
        mkdir(outdir)
        redirect_same = outfile and (outfile == errfile)

        if infile:
            qsub += " -i {0}".format(infile)
        if outfile:
            self.outfile = op.join(outdir, outfile)
            qsub += " -o {0}".format(self.outfile)
        if errfile:
            if redirect_same:
                qsub += " -j y"
            else:
                self.errfile = op.join(outdir, errfile)
                qsub += " -e {0}".format(self.errfile)

        cmd = " ".join((qsub, self.cmd))
        return cmd
github tanghaibao / jcvi / jcvi / assembly / automaton.py View on Github external
def slink(p, pf, tag, extra=None):

    mkdir(pf, overwrite=True)
    cwd = os.getcwd()
    os.chdir(pf)

    # Create sym-links for the input files
    i = 1
    for f in sorted(p):
        gz = ".gz" if f.endswith(".gz") else ""
        if "PE-0" in f:
            sh("ln -sf ../{0} PE-0.fastq{1}".format(f, gz))
            continue
        for t in tag:
            sh("ln -sf ../{0} {1}.{2}.fastq{3}".format(f, t, i, gz))
        i += 1

    if extra:
        for e in extra: