How to use the jcvi.formats.sizes.Sizes function in jcvi

To help you get started, we’ve selected a few jcvi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / jcvi / assembly / sopra.py View on Github external
p.add_option("--insert", type="int", default=0,
            help="Mean insert size [default: estimate from data]")
    p.add_option("--prefix", default=False, action="store_true",
            help="Only keep links between IDs with same prefix [default: %default]")
    p.add_option("--debug", dest="debug", default=False, action="store_true",
            help="Print verbose info when checking mates [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bedfile, fastafile = args
    debug = opts.debug
    cutoff = opts.cutoff

    sizes = Sizes(fastafile)

    cutoffopt = "--cutoff={0}".format(cutoff)
    mateorientationopt = '--mateorientation={0}'.format(opts.mateorientation)
    bedfile, stats = pairs([bedfile, cutoffopt,
                            mateorientationopt, "--rclip={0}".format(opts.rclip)])

    maxcutoff = cutoff or stats.p2
    insert = opts.insert or stats.median
    logging.debug("Mate hangs must be <= {0}, --cutoff to override".\
            format(maxcutoff))

    rs = lambda x: x.accn[:-1]

    fp = open(bedfile)
    linksfile = bedfile.rsplit(".", 1)[0] + ".links"
    fw = open(linksfile, "w")
github tanghaibao / jcvi / jcvi / assembly / patch.py View on Github external
def eject(args):
    """
    %prog eject candidates.bed chr.fasta

    Eject scaffolds from assembly, using the range identified by closest().
    """
    p = OptionParser(eject.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    candidates, chrfasta = args
    sizesfile = Sizes(chrfasta).filename
    cbedfile = complementBed(candidates, sizesfile)

    cbed = Bed(cbedfile)
    for b in cbed:
        b.accn = b.seqid
        b.score = 1000
        b.strand = '+'

    cbed.print_to_file()
github tanghaibao / jcvi / assembly / sopra.py View on Github external
Use --prefix to place the sequences with same prefix together. The final
    product is an AGP file.
    """
    from jcvi.algorithms.graph import nx
    from jcvi.formats.agp import order_to_agp

    p = OptionParser(scaffold.__doc__)
    p.add_option("--prefix", default=False, action="store_true",
            help="Keep IDs with same prefix together [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    ctgfasta, linksfile = args
    sizes = Sizes(ctgfasta).mapping
    logfile = "scaffold.log"
    fwlog = open(logfile, "w")

    pf = ctgfasta.rsplit(".", 1)[0]
    agpfile = pf + ".agp"
    fwagp = open(agpfile, "w")

    g = nx.MultiGraph()  # use this to get connected components

    fp = open(linksfile)
    for row in fp:
        c = LinkLine(row)
        distance = max(c.distance, 50)

        g.add_edge(c.aseqid, c.bseqid,
                orientation=c.orientation, distance=distance)
github tanghaibao / jcvi / jcvi / formats / bed.py View on Github external
def make_bedgraph(bedfile, fastafile):
    sizesfile = Sizes(fastafile).filename
    pf = bedfile.rsplit(".", 1)[0]
    bedfile = sort([bedfile])
    bedgraph = pf + ".bedgraph"
    if need_update(bedfile, bedgraph):
        cmd = "genomeCoverageBed"
        cmd += " -i {0} -g {1} -bga".format(bedfile, sizesfile)
        sh(cmd, outfile=bedgraph)

    return bedgraph
github tanghaibao / jcvi / jcvi / graphics / blastplot.py View on Github external
if len(args) != 1:
        sys.exit(not p.print_help())

    if qbed:
        qsizes = qsizes or sizes([qbed])
        qbed = Bed(qbed)
    if sbed:
        ssizes = ssizes or sizes([sbed])
        sbed = Bed(sbed)

    assert qsizes and ssizes, \
        "You must specify at least one of --sizes of --bed"

    qsizes = Sizes(qsizes, select=opts.qselect)
    ssizes = Sizes(ssizes, select=opts.sselect)

    blastfile, = args

    image_name = op.splitext(blastfile)[0] + "." + opts.format
    plt.rcParams["xtick.major.pad"] = 16
    plt.rcParams["ytick.major.pad"] = 16

    # Fix the width
    xsize, ysize = qsizes.totalsize, ssizes.totalsize

    # get highlight beds
    qh, sh = opts.qh, opts.sh
    qh = Bed(qh) if qh else None
    sh = Bed(sh) if sh else None
    highlights = (qh, sh) if qh or sh else None
github tanghaibao / jcvi / jcvi / annotation / depth.py View on Github external
help="Minimum read depth to report intervals [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    binfile, fastafile = args
    fw = must_open(opts.output, "w")
    cutoff = opts.cutoff
    assert cutoff >= 0, "Need non-negative cutoff"

    b = BinFile(binfile)
    ar = b.array

    fastasize, sizes, offsets = get_offsets(fastafile)
    s = Sizes(fastafile)
    for ctg, ctglen in s.iter_sizes():
        offset = offsets[ctg]
        subarray = ar[offset:offset + ctglen]
        key = lambda x: x[1] >= cutoff
        for tf, array_elements in groupby(enumerate(subarray), key=key):
            array_elements = list(array_elements)
            if not tf:
                continue

            # 0-based system => 1-based system
            start = array_elements[0][0] + 1
            end = array_elements[-1][0] + 1

            mean_depth = sum([x[1] for x in array_elements]) / \
                len(array_elements)
            mean_depth = int(mean_depth)
github tanghaibao / jcvi / jcvi / graphics / mummerplot.py View on Github external
help="Color the dots based on")
    p.add_option("--nolayout", default=False, action="store_true",
                 help="Do not rearrange contigs")
    p.set_align(pctid=0, hitlen=0)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    deltafile, = args
    reffasta, queryfasta = open(deltafile).readline().split()
    color = opts.color
    layout = not opts.nolayout
    prefix = op.basename(deltafile).split(".")[0]
    qsizes = Sizes(queryfasta).mapping
    rsizes = Sizes(reffasta).mapping

    refs = SetFile(opts.refids) if opts.refids else set(rsizes.keys())
    refcov = opts.refcov
    pctid = opts.pctid
    hitlen = opts.hitlen
    deltafile = filter([deltafile, "--pctid={0}".format(pctid),
                        "--hitlen={0}".format(hitlen)])

    if opts.all:
        for r in refs:
            pdffile = plot_some_queries([r], qsizes, rsizes, deltafile, refcov,
                                        prefix=prefix, color=color,
                                        layout=layout)
            if pdffile:
                sh("mv {0} {1}.pdf".format(pdffile, r))
    else:
github tanghaibao / jcvi / jcvi / formats / vcf.py View on Github external
goodsnpcounts[ctg] += 1
        # Tabulate all possible combinations
        intra = ref + "-" + intra
        inter = alt + "-" + inter
        combinations[(intra, inter)] += 1

        if bedfw:
            print("\t".join(str(x) for x in \
                        (ctg, pos - 1, pos, locus)), file=bedfw)

    if bedfw:
        logging.debug("SNP locations written to `{0}`.".format(opts.bed))
        bedfw.close()

    nsites = sum(len(x) for x in snps.values())
    sizes = Sizes(fastafile)
    bpsize = sizes.totalsize
    snprate = lambda a: a * 1000. / bpsize
    m = "Dataset `{0}` contains {1} contigs ({2} bp).\n".\
                format(fastafile, len(sizes), thousands(bpsize))
    m += "A total of {0} SNPs within {1} contigs ({2} bp).\n".\
                format(nsites, len(snps),
                       thousands(sum(sizes.mapping[x] for x in snps.keys())))
    m += "SNP rate: {0:.1f}/Kb, ".format(snprate(nsites))
    m += "IntraSNPs: {0} ({1:.1f}/Kb), InterSNPs: {2} ({3:.1f}/Kb)".\
                format(intraSNPs, snprate(intraSNPs), interSNPs, snprate(interSNPs))
    print(m, file=sys.stderr)
    print(tabulate(combinations), file=sys.stderr)

    leg = "Legend: A - homozygous same, B - homozygous different, X - heterozygous"
    print(leg, file=sys.stderr)
github tanghaibao / jcvi / jcvi / assembly / allmaps.py View on Github external
def write_unplaced_agp(agpfile, scaffolds, unplaced_agp):
    agp = AGP(agpfile)
    scaffolds_seen = set(x.component_id for x in agp)
    sizes = Sizes(scaffolds).mapping
    fwagp = must_open(unplaced_agp, "w")
    for s in natsorted(sizes.keys()):
        if s in scaffolds_seen:
            continue
        order_to_agp(s, [(s, "?")], sizes, fwagp)
    logging.debug("Write unplaced AGP to `{0}`.".format(unplaced_agp))