How to use the jcvi.apps.base.need_update function in jcvi

To help you get started, we’ve selected a few jcvi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / jcvi / jcvi / assembly / ca.py View on Github external
def parse_ctgs(bestedges, frgtoctg):
    cache = "frgtoctg.cache"
    if need_update(frgtoctg, cache):
        reads_to_ctgs = {}
        frgtodeg = frgtoctg.replace(".frgctg", ".frgdeg")
        iidtouid = frgtoctg.replace(".posmap.frgctg", ".iidtouid")
        fp = open(iidtouid)
        frgstore = {}
        for row in fp:
            tag, iid, uid = row.split()
            if tag == "FRG":
                frgstore[uid] = int(iid)

        for pf, f in zip(("ctg", "deg"), (frgtoctg, frgtodeg)):
            fp = open(f)
            logging.debug("Parse posmap file `{0}`".format(f))
            for row in fp:
                frg, ctg = row.split()[:2]
                frg = frgstore[frg]
github tanghaibao / jcvi / jcvi / formats / bed.py View on Github external
opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bedfile, fastafile = args
    mindepth = opts.mindepth
    bedgraph = make_bedgraph(bedfile)

    bedgraphfiltered = bedgraph + ".d{0}".format(mindepth)
    if need_update(bedgraph, bedgraphfiltered):
        filter([bedgraph, "--minaccn={0}".format(mindepth),
                "--outfile={0}".format(bedgraphfiltered)])

    merged = bedgraphfiltered + ".merge.fasta"
    if need_update(bedgraphfiltered, merged):
        mergeBed(bedgraphfiltered, sorted=True)
github tanghaibao / jcvi / jcvi / assembly / ca.py View on Github external
plate = op.basename(fastafile).split(".")[0]

    mated = (opts.size != 0)
    mean, sv = get_mean_sv(opts.size)

    if mated:
        libname = "Sanger{0}Kb-".format(opts.size / 1000) + plate
    else:
        libname = plate

    frgfile = libname + ".frg"

    if opts.clean:
        cleanfasta = fastafile.rsplit(".", 1)[0] + ".clean.fasta"
        if need_update(fastafile, cleanfasta):
            clean([fastafile, "--canonical", "-o", cleanfasta])
        fastafile = cleanfasta

    if mated:
        qualfile = make_qual(fastafile, score=21)
        if opts.matefile:
            matefile = opts.matefile
            assert op.exists(matefile)
        else:
            matefile = make_matepairs(fastafile)

        cmd = "convert-fasta-to-v2.pl"
        cmd += " -l {0} -s {1} -q {2} ".format(libname, fastafile, qualfile)
        if mated:
            cmd += "-mean {0} -stddev {1} -m {2} ".format(mean, sv, matefile)
github tanghaibao / jcvi / jcvi / assembly / gaps.py View on Github external
"""
    from jcvi.formats.base import DictFile
    from jcvi.apps.align import blast

    p = OptionParser(sizes.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    gapsbed, afasta, bfasta = args
    pf = gapsbed.rsplit(".", 1)[0]
    extbed = pf + ".ext.bed"
    extfasta = pf + ".ext.fasta"

    if need_update(gapsbed, extfasta):
        extbed, extfasta = flanks([gapsbed, afasta])

    q = op.basename(extfasta).split(".")[0]
    r = op.basename(bfasta).split(".")[0]
    blastfile = "{0}.{1}.blast".format(q, r)

    if need_update([extfasta, bfasta], blastfile):
        blastfile = blast([bfasta, extfasta, "--wordsize=50", "--pctid=98"])

    labelsfile = blast_to_twobeds(blastfile)
    labels = DictFile(labelsfile, delimiter='\t')
    bed = Bed(gapsbed)
    for b in bed:
        b.score = b.span
        accn = b.accn
        print("\t".join((str(x) for x in (b.seqid, b.start - 1, b.end, accn,
github tanghaibao / jcvi / jcvi / projects / tgbs.py View on Github external
p.set_home("cdhit", default="/usr/local/bin/")
    p.set_home("fiona", default="/usr/local/bin/")
    p.set_home("jellyfish", default="/usr/local/bin/")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastqfile, = args
    cpus = opts.cpus
    depth = opts.depth
    pf, sf = fastqfile.rsplit(".", 1)

    diginormfile = pf + ".diginorm." + sf
    if need_update(fastqfile, diginormfile):
        diginorm([fastqfile, "--single", "--depth={0}".format(depth)])
        keepabund = fastqfile + ".keep.abundfilt"
        sh("cp -s {0} {1}".format(keepabund, diginormfile))

    jf = pf + "-K23.histogram"
    if need_update(diginormfile, jf):
        jellyfish([diginormfile, "--prefix={0}".format(pf),
                   "--cpus={0}".format(cpus),
                   "--jellyfish_home={0}".format(opts.jellyfish_home)])

    genomesize = histogram([jf, pf, "23"])
    fiona = pf + ".fiona.fa"
    if need_update(diginormfile, fiona):
        cmd = op.join(opts.fiona_home, "fiona")
        cmd += " -g {0} -nt {1} --sequencing-technology {2}".\
            format(genomesize, cpus, opts.technology)
github tanghaibao / jcvi / jcvi / apps / bwa.py View on Github external
def check_aln(dbfile, readfile, cpus=32):
    from jcvi.formats.fastq import guessoffset

    saifile = readfile.rsplit(".", 1)[0] + ".sai"
    if need_update((dbfile, readfile), saifile):
        offset = guessoffset([readfile])
        cmd = "bwa aln " + " ".join((dbfile, readfile))
        cmd += " -t {0}".format(cpus)
        if offset == 64:
            cmd += " -I"
        sh(cmd, outfile=saifile)
    else:
        logging.error("`{0}` exists. `bwa aln` already run.".format(saifile))

    return saifile
github tanghaibao / jcvi / jcvi / formats / bed.py View on Github external
def rmdup_bedpe(filtered, rmdup, dupwiggle=10):
    sortedfiltered = filtered + ".sorted"
    if need_update(filtered, sortedfiltered):
        sh("sort -k1,1 -k2,2n -i {0} -o {1}".format(filtered, sortedfiltered))

    logging.debug("Rmdup criteria: wiggle <= {0}".format(dupwiggle))
    fp = must_open(sortedfiltered)
    fw = must_open(rmdup, "w")
    data = [BedpeLine(x) for x in fp]
    retained = total = 0
    for seqid, ss in groupby(data, key=lambda x: x.seqid1):
        ss = list(ss)
        for i, a in enumerate(ss):
            if a.isdup:
                continue
            for b in ss[i + 1:]:
                if b.start1 > a.start1 + dupwiggle:
                    break
                if b.isdup:
github tanghaibao / jcvi / jcvi / formats / sam.py View on Github external
cmd += " -@ {0}".format(cpus)
    if opts.unique:
        cmd += " -q 1"

    if samfile.endswith(".sam") and need_update(samfile, bamfile):
        sh(cmd)

    # Already sorted?
    if bamfile.endswith(".sorted.bam"):
        sortedbamfile = bamfile
    else:
        prefix = bamfile.replace(".bam", "")
        sortedbamfile = prefix + ".sorted.bam"

    if need_update(bamfile, sortedbamfile):
        cmd = "samtools sort {0} -o {1}".format(bamfile, sortedbamfile)
        cmd += " -@ {0}".format(cpus)
        sh(cmd)

    baifile = sortedbamfile + ".bai"
    if need_update(sortedbamfile, baifile):
        sh("samtools index {0}".format(sortedbamfile))

    return sortedbamfile
github tanghaibao / jcvi / jcvi / projects / ies.py View on Github external
ies_name = "{0:05d}-r{1}".format(ies_id, count)
            if count < opts.mindepth:
                continue
            print("\t".join(str(x) for x in \
                            (seqid, start - 1, end, ies_name)), file=fw)
            ies_id += 1
        fw.close()
        sort([countbedfile, "-i", sort_tmpdir])

    # Remove deletions that contain some read depth
    depthbedfile = pf + ".depth.bed"
    if need_update((sortedbedfile, countbedfile), depthbedfile):
        depth([sortedbedfile, countbedfile, "--outfile={0}".format(depthbedfile)])

    validbedfile = pf + ".valid.bed"
    if need_update(depthbedfile, validbedfile):
        fw = open(validbedfile, "w")
        logging.debug("Filter valid deletions to `{0}`.".format(validbedfile))
        bed = Bed(depthbedfile)
        all_scores = [float(b.score) for b in bed]
        lb, ub = outlier_cutoff(all_scores)
        logging.debug("Bounds for depths: LB={0:.2f} (ignored)  UB={1:.2f}".format(lb, ub))
        for b in bed:
            if float(b.score) > ub:
                continue
            print(b, file=fw)
        fw.close()

    # Remove deletions that contain sequencing gaps on its flanks
    selectedbedfile = pf + ".selected.bed"
    if need_update(validbedfile, selectedbedfile):
        flanksbedfile = pf + ".flanks.bed"
github tanghaibao / jcvi / jcvi / assembly / coverage.py View on Github external
def __init__(self, bedfile, sizesfile):

        bedfile = sort([bedfile])
        coveragefile = bedfile + ".coverage"
        if need_update(bedfile, coveragefile):
            cmd = "genomeCoverageBed"
            cmd += " -bg -i {0} -g {1}".format(bedfile, sizesfile)
            sh(cmd, outfile=coveragefile)

        self.sizes = Sizes(sizesfile).mapping

        filename = coveragefile
        assert filename.endswith(".coverage")
        super(Coverage, self).__init__(filename)