How to use jcvi - 10 common examples

To help you get started, we’ve selected a few jcvi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / jcvi / jcvi / formats / agp.py View on Github external
def stats(args):
    """
    %prog stats agpfile

    Print out a report for length of gaps and components.
    """
    from jcvi.utils.table import tabulate

    p = OptionParser(stats.__doc__)
    p.add_option("--warn", default=False, action="store_true",
                 help="Warnings on small component spans [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(p.print_help())

    agpfile, = args

    agp = AGP(agpfile)
    gap_lengths = []
    component_lengths = []
    for a in agp:
        span = a.object_span
        if a.is_gap:
            label = a.gap_type
github tanghaibao / jcvi / jcvi / assembly / syntenypath.py View on Github external
def bed(args):
    """
    %prog bed anchorsfile

    Convert ANCHORS file to BED format.
    """
    from collections import defaultdict
    from jcvi.compara.synteny import AnchorFile, check_beds
    from jcvi.formats.bed import Bed
    from jcvi.formats.base import get_number

    p = OptionParser(bed.__doc__)
    p.add_option("--switch", default=False, action="store_true",
                 help="Switch reference and aligned map elements")
    p.add_option("--scale", type="float",
                 help="Scale the aligned map distance by factor")
    p.set_beds()
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    anchorsfile, = args
    switch = opts.switch
    scale = opts.scale
    ac = AnchorFile(anchorsfile)
    pairs = defaultdict(list)
github tanghaibao / jcvi / jcvi / formats / excel.py View on Github external
def csv(args):
    """
    %prog csv excelfile

    Convert EXCEL to csv file.
    """
    from xlrd import open_workbook

    p = OptionParser(csv.__doc__)
    p.set_sep(sep=',')
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    excelfile, = args
    sep = opts.sep
    csvfile = excelfile.rsplit(".", 1)[0] + ".csv"
    wb = open_workbook(excelfile)
    fw = open(csvfile, "w")
    for s in wb.sheets():
        print('Sheet:',s.name, file=sys.stderr)
        for row in range(s.nrows):
            values = []
            for col in range(s.ncols):
github tanghaibao / jcvi / assembly / preprocess.py View on Github external
def hetsmooth(args):
    """
    %prog hetsmooth reads_1.fq reads_2.fq jf-23_0

    Wrapper against het-smooth. Below is the command used in het-smooth manual.

    $ het-smooth --kmer-len=23 --bottom-threshold=38 --top-threshold=220
           --no-multibase-replacements --jellyfish-hash-file=23-mers.jf
               reads_1.fq reads_2.fq
    """
    p = OptionParser(hetsmooth.__doc__)
    p.add_option("-K", default=23, type="int",
                 help="K-mer size [default: %default]")
    p.add_option("-L", type="int",
                 help="Bottom threshold, first min [default: %default]")
    p.add_option("-U", type="int",
                 help="Top threshold, second min [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    reads1fq, reads2fq, jfdb = args
    K = opts.K
    L = opts.L
    U = opts.U
github tanghaibao / jcvi / jcvi / formats / coords.py View on Github external
def coverage(args):
    """
    %prog coverage coordsfile

    Report the coverage per query record, useful to see which query matches
    reference.  The coords file MUST be filtered with supermap::

    jcvi.algorithms.supermap --filter query
    """
    p = OptionParser(coverage.__doc__)
    p.add_option("-c", dest="cutoff", default=0.5, type="float",
            help="only report query with coverage greater than [default: %default]")

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    coordsfile, = args
    fp = open(coordsfile)

    coords = []
    for row in fp:
        try:
            c = CoordsLine(row)
        except AssertionError:
github tanghaibao / jcvi / jcvi / apps / gmap.py View on Github external
def bam(args):
    """
    %prog snp input.gsnap ref.fasta

    Convert GSNAP output to BAM.
    """
    from jcvi.formats.sizes import Sizes
    from jcvi.formats.sam import index

    p = OptionParser(bam.__doc__)
    p.set_home("eddyyeh")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    gsnapfile, fastafile = args
    EYHOME = opts.eddyyeh_home
    pf = gsnapfile.rsplit(".", 1)[0]
    uniqsam = pf + ".unique.sam"
    samstats = uniqsam + ".stats"
    sizesfile = Sizes(fastafile).filename
    if need_update((gsnapfile, sizesfile), samstats):
        cmd = op.join(EYHOME, "gsnap2gff3.pl")
        cmd += " --format sam -i {0} -o {1}".format(gsnapfile, uniqsam)
github tanghaibao / jcvi / jcvi / assembly / kmer.py View on Github external
def bin(args):
    """
    %prog bin filename filename.bin

    Serialize counts to bitarrays.
    """
    from bitarray import bitarray
    p = OptionParser(bin.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    inp, outp = args
    fp = must_open(inp)
    fw = must_open(outp, "w")
    a = bitarray()
    for row in fp:
        c = row.split()[-1]
        a.append(int(c))
    a.tofile(fw)
    fw.close()
github tanghaibao / jcvi / jcvi / formats / bed.py View on Github external
def density(args):
    """
    %prog density bedfile ref.fasta

    Calculates density of features per seqid.
    """
    p = OptionParser(density.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    bedfile, fastafile = args
    bed = Bed(bedfile)
    sizes = Sizes(fastafile).mapping
    header = "seqid features size density_per_Mb".split()
    print("\t".join(header))
    for seqid, bb in bed.sub_beds():
        nfeats = len(bb)
        size = sizes[seqid]
        ds = nfeats * 1e6 / size
        print("\t".join(str(x) for x in \
                    (seqid, nfeats, size, "{0:.1f}".format(ds))))
github tanghaibao / jcvi / assembly / sopra.py View on Github external
def bundle(args):
    """
    %prog bundle linkfiles

    Bundle contig links into high confidence contig edges. This is useful to
    combine multiple linkfiles (from different libraries).
    """
    import numpy as np

    p = OptionParser(bundle.__doc__)
    p.add_option("--links", type="int", default=1,
            help="Minimum number of mate pairs to bundle [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fp = must_open(args)
    contigGraph = defaultdict(list)
    for row in fp:
        c = LinkLine(row)
        contigGraph[(c.aseqid, c.bseqid)].append((c.orientation, c.distance))

    for (aseqid, bseqid), distances in contigGraph.items():
        # For the same pair of contigs, their might be conflicting orientations
        # or distances. Only keep the one with the most pairs.
github tanghaibao / jcvi / assembly / preprocess.py View on Github external
@depends
def run_FastbAndQualb2Fastq(infile=None, outfile=None, rc=False):
    corr = op.basename(infile).rsplit(".", 1)[0]
    cmd = "FastbQualbToFastq HEAD_IN={0} HEAD_OUT={0}".format(corr)
    cmd += " PAIRED=False PHRED_OFFSET=33"
    if rc:
        cmd += " FLIP=True"
    sh(cmd)