Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def stats(args):
"""
%prog stats agpfile
Print out a report for length of gaps and components.
"""
from jcvi.utils.table import tabulate
p = OptionParser(stats.__doc__)
p.add_option("--warn", default=False, action="store_true",
help="Warnings on small component spans [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(p.print_help())
agpfile, = args
agp = AGP(agpfile)
gap_lengths = []
component_lengths = []
for a in agp:
span = a.object_span
if a.is_gap:
label = a.gap_type
def bed(args):
"""
%prog bed anchorsfile
Convert ANCHORS file to BED format.
"""
from collections import defaultdict
from jcvi.compara.synteny import AnchorFile, check_beds
from jcvi.formats.bed import Bed
from jcvi.formats.base import get_number
p = OptionParser(bed.__doc__)
p.add_option("--switch", default=False, action="store_true",
help="Switch reference and aligned map elements")
p.add_option("--scale", type="float",
help="Scale the aligned map distance by factor")
p.set_beds()
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
anchorsfile, = args
switch = opts.switch
scale = opts.scale
ac = AnchorFile(anchorsfile)
pairs = defaultdict(list)
def csv(args):
"""
%prog csv excelfile
Convert EXCEL to csv file.
"""
from xlrd import open_workbook
p = OptionParser(csv.__doc__)
p.set_sep(sep=',')
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
excelfile, = args
sep = opts.sep
csvfile = excelfile.rsplit(".", 1)[0] + ".csv"
wb = open_workbook(excelfile)
fw = open(csvfile, "w")
for s in wb.sheets():
print('Sheet:',s.name, file=sys.stderr)
for row in range(s.nrows):
values = []
for col in range(s.ncols):
def hetsmooth(args):
"""
%prog hetsmooth reads_1.fq reads_2.fq jf-23_0
Wrapper against het-smooth. Below is the command used in het-smooth manual.
$ het-smooth --kmer-len=23 --bottom-threshold=38 --top-threshold=220
--no-multibase-replacements --jellyfish-hash-file=23-mers.jf
reads_1.fq reads_2.fq
"""
p = OptionParser(hetsmooth.__doc__)
p.add_option("-K", default=23, type="int",
help="K-mer size [default: %default]")
p.add_option("-L", type="int",
help="Bottom threshold, first min [default: %default]")
p.add_option("-U", type="int",
help="Top threshold, second min [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
reads1fq, reads2fq, jfdb = args
K = opts.K
L = opts.L
U = opts.U
def coverage(args):
"""
%prog coverage coordsfile
Report the coverage per query record, useful to see which query matches
reference. The coords file MUST be filtered with supermap::
jcvi.algorithms.supermap --filter query
"""
p = OptionParser(coverage.__doc__)
p.add_option("-c", dest="cutoff", default=0.5, type="float",
help="only report query with coverage greater than [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
coordsfile, = args
fp = open(coordsfile)
coords = []
for row in fp:
try:
c = CoordsLine(row)
except AssertionError:
def bam(args):
"""
%prog snp input.gsnap ref.fasta
Convert GSNAP output to BAM.
"""
from jcvi.formats.sizes import Sizes
from jcvi.formats.sam import index
p = OptionParser(bam.__doc__)
p.set_home("eddyyeh")
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
gsnapfile, fastafile = args
EYHOME = opts.eddyyeh_home
pf = gsnapfile.rsplit(".", 1)[0]
uniqsam = pf + ".unique.sam"
samstats = uniqsam + ".stats"
sizesfile = Sizes(fastafile).filename
if need_update((gsnapfile, sizesfile), samstats):
cmd = op.join(EYHOME, "gsnap2gff3.pl")
cmd += " --format sam -i {0} -o {1}".format(gsnapfile, uniqsam)
def bin(args):
"""
%prog bin filename filename.bin
Serialize counts to bitarrays.
"""
from bitarray import bitarray
p = OptionParser(bin.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
inp, outp = args
fp = must_open(inp)
fw = must_open(outp, "w")
a = bitarray()
for row in fp:
c = row.split()[-1]
a.append(int(c))
a.tofile(fw)
fw.close()
def density(args):
"""
%prog density bedfile ref.fasta
Calculates density of features per seqid.
"""
p = OptionParser(density.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
bedfile, fastafile = args
bed = Bed(bedfile)
sizes = Sizes(fastafile).mapping
header = "seqid features size density_per_Mb".split()
print("\t".join(header))
for seqid, bb in bed.sub_beds():
nfeats = len(bb)
size = sizes[seqid]
ds = nfeats * 1e6 / size
print("\t".join(str(x) for x in \
(seqid, nfeats, size, "{0:.1f}".format(ds))))
def bundle(args):
"""
%prog bundle linkfiles
Bundle contig links into high confidence contig edges. This is useful to
combine multiple linkfiles (from different libraries).
"""
import numpy as np
p = OptionParser(bundle.__doc__)
p.add_option("--links", type="int", default=1,
help="Minimum number of mate pairs to bundle [default: %default]")
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
fp = must_open(args)
contigGraph = defaultdict(list)
for row in fp:
c = LinkLine(row)
contigGraph[(c.aseqid, c.bseqid)].append((c.orientation, c.distance))
for (aseqid, bseqid), distances in contigGraph.items():
# For the same pair of contigs, their might be conflicting orientations
# or distances. Only keep the one with the most pairs.
def main():
"""
%prog numbers1.txt number2.txt ...
Print histogram of the data files. The data files contain one number per
line. If more than one file is inputted, the program will combine the
histograms into the same plot.
"""
allowed_format = ("emf", "eps", "pdf", "png", "ps", \
"raw", "rgba", "svg", "svgz")
p = OptionParser(main.__doc__)
p.add_option("--skip", default=0, type="int",
help="skip the first several lines [default: %default]")
p.add_option("--col", default=0, type="int", help="Get the n-th column")
p.set_histogram()
p.add_option("--tags", dest="tags", default=None,
help="tags for data if multiple input files, comma sep")
p.add_option("--ascii", default=False, action="store_true",
help="print ASCII text stem-leaf plot [default: %default]")
p.add_option("--base", default="0", choices=("0", "2", "10"),
help="use logarithm axis with base, 0 to disable [default: %default]")
p.add_option("--facet", default=False, action="store_true",
help="place multiple histograms side-by-side [default: %default]")
p.add_option("--fill", default="white",
help="color of the bin [default: %default]")
p.add_option("--format", default="pdf", choices=allowed_format,
help="Generate image of format [default: %default]")