How to use the jcvi.formats.bed.Bed function in jcvi

To help you get started, we’ve selected a few jcvi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / jcvi / jcvi / assembly / patch.py View on Github external
seen.add(gapid)
            gi, gb = gorder[gapid]
            gpbed.append(gb)
            gappositions[(gb.seqid, gb.start, gb.end)] = gapid
        gap_to_scf[gapid].append((unplaced, strand))

    gpbedfile = "candidate.gaps.bed"
    gpbed.print_to_file(gpbedfile, sorted=True)

    agpfile = agp([chrfasta])
    maskedagpfile = mask([agpfile, gpbedfile])
    maskedbedfile = maskedagpfile.rsplit(".", 1)[0] + ".bed"
    bed([maskedagpfile, "--outfile={0}".format(maskedbedfile)])

    mbed = Bed(maskedbedfile)
    finalbed = Bed()
    for b in mbed:
        sid = b.seqid
        key = (sid, b.start, b.end)
        if key not in gappositions:
            finalbed.add("{0}\n".format(b))
            continue

        gapid = gappositions[key]
        scfs = gap_to_scf[gapid]

        # For scaffolds placed in the same gap, sort according to positions
        scfs.sort(key=lambda x: corder[x[0]][1].start + corder[x[0]][1].end)
        for scf, strand in scfs:
            size = sizes[scf]
            finalbed.add("\t".join(str(x) for x in \
                    (scf, 0, size, sid, 1000, strand)))
github tanghaibao / jcvi / jcvi / compara / pad.py View on Github external
sr = Range("0", mins, maxs, maxs - mins, id)
        qranges.append(qr)
        sranges.append(sr)

    qpads = list(get_segments(qranges, qextra))
    spads = list(get_segments(sranges, sextra))

    suffix = ".pad.bed"
    qpf = opts.qbed.split(".")[0]
    spf = opts.sbed.split(".")[0]
    qpadfile = qpf + suffix
    spadfile = spf + suffix
    qnpads, qpadnames = write_PAD_bed(qpadfile, qpf, qpads, qbed)
    snpads, spadnames = write_PAD_bed(spadfile, spf, spads, sbed)

    qpadbed, spadbed = Bed(qpadfile), Bed(spadfile)

    logmp = make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames)
    m, n = logmp.shape

    matrixfile = ".".join((qpf, spf, "logmp.txt"))
    fw = open(matrixfile, "w")
    header = ["o"] + spadnames
    print("\t".join(header), file=fw)
    for i in xrange(m):
        row = [qpadnames[i]] + ["{0:.1f}".format(x) for x in logmp[i, :]]
        print("\t".join(row), file=fw)

    fw.close()

    # Run CLUSTER 3.0 (Pearson correlation, average linkage)
    cmd = op.join(opts.path, "cluster")
github tanghaibao / jcvi / assembly / scaffoldQC.py View on Github external
the plots are one contig/scaffold per plot.
    """
    from jcvi.graphics.base import set_image_options
    from jcvi.utils.iter import grouper

    p = OptionParser(plot.__doc__)
    p.add_option("--cutoff", type="int", default=1000000,
            help="Plot scaffolds with size larger than [default: %default]")
    opts, args, iopts = set_image_options(p, args, figsize="14x8", dpi=150)

    if len(args) < 4 or len(args) % 3 != 1:
        sys.exit(not p.print_help())

    scafsizes = Sizes(args[0])
    trios = list(grouper(3, args[1:]))
    trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios]

    for scaffoldID, scafsize in scafsizes.iter_sizes():
        if scafsize < opts.cutoff:
            continue
        logging.debug("Loading {0} (size={1})".format(scaffoldID,
            thousands(scafsize)))

        tmpname = scaffoldID + ".sizes"
        tmp = open(tmpname, "w")
        tmp.write("{0}\t{1}".format(scaffoldID, scafsize))
        tmp.close()

        tmpsizes = Sizes(tmpname)
        tmpsizes.close(clean=True)

        imagename = ".".join((scaffoldID, opts.format))
github tanghaibao / jcvi / jcvi / assembly / patch.py View on Github external
p.add_option("--maxsize", default=300000, type="int",
            help="Maximum size of patchers to be replaced [default: %default]")
    p.add_option("--prefix", help="Prefix of the new object [default: %default]")
    p.add_option("--strict", default=False, action="store_true",
            help="Only update if replacement has no gaps [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 4:
        sys.exit(not p.print_help())

    pbed, pfasta, bbfasta, altfasta = args
    maxsize = opts.maxsize  # Max DNA size to replace gap
    rclip = opts.rclip

    blastfile = blast([altfasta, pfasta,"--wordsize=100", "--pctid=99"])
    order = Bed(pbed).order
    beforebed, afterbed = blast_to_twobeds(blastfile, order, rclip=rclip,
                                           maxsize=maxsize)

    beforefasta = fastaFromBed(beforebed, bbfasta, name=True, stranded=True)
    afterfasta = fastaFromBed(afterbed, altfasta, name=True, stranded=True)

    # Exclude the replacements that contain more Ns than before
    ah = SeqIO.parse(beforefasta, "fasta")
    bh = SeqIO.parse(afterfasta, "fasta")
    count_Ns = lambda x: x.seq.count('n') + x.seq.count('N')
    exclude = set()
    for arec, brec in zip(ah, bh):
        an = count_Ns(arec)
        bn = count_Ns(brec)
        if opts.strict:
            if bn == 0:
github tanghaibao / jcvi / jcvi / graphics / blastplot.py View on Github external
qsizes = Sizes(qsizes, select=opts.qselect)
    ssizes = Sizes(ssizes, select=opts.sselect)

    blastfile, = args

    image_name = op.splitext(blastfile)[0] + "." + opts.format
    plt.rcParams["xtick.major.pad"] = 16
    plt.rcParams["ytick.major.pad"] = 16

    # Fix the width
    xsize, ysize = qsizes.totalsize, ssizes.totalsize

    # get highlight beds
    qh, sh = opts.qh, opts.sh
    qh = Bed(qh) if qh else None
    sh = Bed(sh) if sh else None
    highlights = (qh, sh) if qh or sh else None

    ratio = ysize * 1. / xsize if proportional else 1
    width = iopts.w
    height = iopts.h * ratio
    fig = plt.figure(1, (width, height))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed,
            style=opts.dotstyle, proportional=proportional, sampleN=opts.nmax,
            baseticks=True, stripNames=opts.stripNames, highlights=highlights)

    # add genome names
    to_ax_label = lambda fname: op.basename(fname).split(".")[0]
github tanghaibao / jcvi / jcvi / formats / bed.py View on Github external
def intersectBed_wao(abedfile, bbedfile, minOverlap=0):
    abed = Bed(abedfile)
    bbed = Bed(bbedfile)
    print("`{0}` has {1} features.".format(abedfile, len(abed)), file=sys.stderr)
    print("`{0}` has {1} features.".format(bbedfile, len(bbed)), file=sys.stderr)

    cmd = "intersectBed -wao -a {0} -b {1}".format(abedfile, bbedfile)
    acols = abed[0].nargs
    bcols = bbed[0].nargs
    fp = popen(cmd)
    for row in fp:
        atoms = row.split()
        aline = "\t".join(atoms[:acols])
        bline = "\t".join(atoms[acols:acols + bcols])
        c = int(atoms[-1])
        if c < minOverlap:
            continue
        a = BedLine(aline)
        try:
github tanghaibao / jcvi / jcvi / graphics / blastplot.py View on Github external
help="Only plot maximum of N dots [default: %default]")
    opts, args, iopts = p.set_image_options(figsize="8x8", style="dark", dpi=150)

    qsizes, ssizes = opts.qsizes, opts.ssizes
    qbed, sbed = opts.qbed, opts.sbed
    proportional = opts.proportional

    if len(args) != 1:
        sys.exit(not p.print_help())

    if qbed:
        qsizes = qsizes or sizes([qbed])
        qbed = Bed(qbed)
    if sbed:
        ssizes = ssizes or sizes([sbed])
        sbed = Bed(sbed)

    assert qsizes and ssizes, \
        "You must specify at least one of --sizes of --bed"

    qsizes = Sizes(qsizes, select=opts.qselect)
    ssizes = Sizes(ssizes, select=opts.sselect)

    blastfile, = args

    image_name = op.splitext(blastfile)[0] + "." + opts.format
    plt.rcParams["xtick.major.pad"] = 16
    plt.rcParams["ytick.major.pad"] = 16

    # Fix the width
    xsize, ysize = qsizes.totalsize, ssizes.totalsize
github tanghaibao / jcvi / jcvi / formats / bed.py View on Github external
p = OptionParser(random.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bedfile, N = args
    assert is_number(N)

    b = Bed(bedfile)
    NN = flexible_cast(N)
    if NN < 1:
        NN = int(round(NN * len(b)))

    beds = sample(b, NN)
    new_bed = Bed()
    new_bed.extend(beds)

    outfile = bedfile.rsplit(".", 1)[0] + ".{0}.bed".format(N)
    new_bed.print_to_file(outfile)
    logging.debug("Write {0} features to `{1}`".format(NN, outfile))
github tanghaibao / jcvi / jcvi / assembly / patch.py View on Github external
nogapsfw.close()
    largestgapsfw.close()
    beds = [largestgapsbed]
    toclean = [nogapsbed, largestgapsbed]

    if opts.closest:
        closestgapsbed = pf + ".closestgaps.bed"
        cmd = "closestBed -a {0} -b {1} -d".format(nogapsbed, gapsbed)
        sh(cmd, outfile=closestgapsbed)
        beds += [closestgapsbed]
        toclean += [closestgapsbed]
    else:
        pointbed = pf + ".point.bed"
        pbed = Bed()
        bed = Bed(nogapsbed)
        for b in bed:
            pos = (b.start + b.end) / 2
            b.start, b.end = pos, pos
            pbed.append(b)
        pbed.print_to_file(pointbed)
        beds += [pointbed]
        toclean += [pointbed]

    refinedbed = pf + ".refined.bed"
    FileMerger(beds, outfile=refinedbed).merge()

    # Clean-up
    FileShredder(toclean)

    return refinedbed
github tanghaibao / jcvi / jcvi / assembly / patch.py View on Github external
exclude.add(id)

    logging.debug("Ignore {0} updates because of decreasing quality."\
                    .format(len(exclude)))


    abed = Bed(beforebed, sorted=False)
    bbed = Bed(afterbed, sorted=False)
    abed = [x for x in abed if x.accn not in exclude]
    bbed = [x for x in bbed if x.accn not in exclude]

    abedfile = "before.filtered.bed"
    bbedfile = "after.filtered.bed"
    afbed = Bed()
    afbed.extend(abed)
    bfbed = Bed()
    bfbed.extend(bbed)

    afbed.print_to_file(abedfile)
    bfbed.print_to_file(bbedfile)

    shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)