Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
p.add_option("--insert", type="int", default=0,
help="Mean insert size [default: estimate from data]")
p.add_option("--prefix", default=False, action="store_true",
help="Only keep links between IDs with same prefix [default: %default]")
p.add_option("--debug", dest="debug", default=False, action="store_true",
help="Print verbose info when checking mates [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
bedfile, fastafile = args
debug = opts.debug
cutoff = opts.cutoff
sizes = Sizes(fastafile)
cutoffopt = "--cutoff={0}".format(cutoff)
mateorientationopt = '--mateorientation={0}'.format(opts.mateorientation)
bedfile, stats = pairs([bedfile, cutoffopt,
mateorientationopt, "--rclip={0}".format(opts.rclip)])
maxcutoff = cutoff or stats.p2
insert = opts.insert or stats.median
logging.debug("Mate hangs must be <= {0}, --cutoff to override".\
format(maxcutoff))
rs = lambda x: x.accn[:-1]
fp = open(bedfile)
linksfile = bedfile.rsplit(".", 1)[0] + ".links"
fw = open(linksfile, "w")
def eject(args):
"""
%prog eject candidates.bed chr.fasta
Eject scaffolds from assembly, using the range identified by closest().
"""
p = OptionParser(eject.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
candidates, chrfasta = args
sizesfile = Sizes(chrfasta).filename
cbedfile = complementBed(candidates, sizesfile)
cbed = Bed(cbedfile)
for b in cbed:
b.accn = b.seqid
b.score = 1000
b.strand = '+'
cbed.print_to_file()
Use --prefix to place the sequences with same prefix together. The final
product is an AGP file.
"""
from jcvi.algorithms.graph import nx
from jcvi.formats.agp import order_to_agp
p = OptionParser(scaffold.__doc__)
p.add_option("--prefix", default=False, action="store_true",
help="Keep IDs with same prefix together [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
ctgfasta, linksfile = args
sizes = Sizes(ctgfasta).mapping
logfile = "scaffold.log"
fwlog = open(logfile, "w")
pf = ctgfasta.rsplit(".", 1)[0]
agpfile = pf + ".agp"
fwagp = open(agpfile, "w")
g = nx.MultiGraph() # use this to get connected components
fp = open(linksfile)
for row in fp:
c = LinkLine(row)
distance = max(c.distance, 50)
g.add_edge(c.aseqid, c.bseqid,
orientation=c.orientation, distance=distance)
def make_bedgraph(bedfile, fastafile):
sizesfile = Sizes(fastafile).filename
pf = bedfile.rsplit(".", 1)[0]
bedfile = sort([bedfile])
bedgraph = pf + ".bedgraph"
if need_update(bedfile, bedgraph):
cmd = "genomeCoverageBed"
cmd += " -i {0} -g {1} -bga".format(bedfile, sizesfile)
sh(cmd, outfile=bedgraph)
return bedgraph
if len(args) != 1:
sys.exit(not p.print_help())
if qbed:
qsizes = qsizes or sizes([qbed])
qbed = Bed(qbed)
if sbed:
ssizes = ssizes or sizes([sbed])
sbed = Bed(sbed)
assert qsizes and ssizes, \
"You must specify at least one of --sizes of --bed"
qsizes = Sizes(qsizes, select=opts.qselect)
ssizes = Sizes(ssizes, select=opts.sselect)
blastfile, = args
image_name = op.splitext(blastfile)[0] + "." + opts.format
plt.rcParams["xtick.major.pad"] = 16
plt.rcParams["ytick.major.pad"] = 16
# Fix the width
xsize, ysize = qsizes.totalsize, ssizes.totalsize
# get highlight beds
qh, sh = opts.qh, opts.sh
qh = Bed(qh) if qh else None
sh = Bed(sh) if sh else None
highlights = (qh, sh) if qh or sh else None
help="Minimum read depth to report intervals [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
binfile, fastafile = args
fw = must_open(opts.output, "w")
cutoff = opts.cutoff
assert cutoff >= 0, "Need non-negative cutoff"
b = BinFile(binfile)
ar = b.array
fastasize, sizes, offsets = get_offsets(fastafile)
s = Sizes(fastafile)
for ctg, ctglen in s.iter_sizes():
offset = offsets[ctg]
subarray = ar[offset:offset + ctglen]
key = lambda x: x[1] >= cutoff
for tf, array_elements in groupby(enumerate(subarray), key=key):
array_elements = list(array_elements)
if not tf:
continue
# 0-based system => 1-based system
start = array_elements[0][0] + 1
end = array_elements[-1][0] + 1
mean_depth = sum([x[1] for x in array_elements]) / \
len(array_elements)
mean_depth = int(mean_depth)
help="Color the dots based on")
p.add_option("--nolayout", default=False, action="store_true",
help="Do not rearrange contigs")
p.set_align(pctid=0, hitlen=0)
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
deltafile, = args
reffasta, queryfasta = open(deltafile).readline().split()
color = opts.color
layout = not opts.nolayout
prefix = op.basename(deltafile).split(".")[0]
qsizes = Sizes(queryfasta).mapping
rsizes = Sizes(reffasta).mapping
refs = SetFile(opts.refids) if opts.refids else set(rsizes.keys())
refcov = opts.refcov
pctid = opts.pctid
hitlen = opts.hitlen
deltafile = filter([deltafile, "--pctid={0}".format(pctid),
"--hitlen={0}".format(hitlen)])
if opts.all:
for r in refs:
pdffile = plot_some_queries([r], qsizes, rsizes, deltafile, refcov,
prefix=prefix, color=color,
layout=layout)
if pdffile:
sh("mv {0} {1}.pdf".format(pdffile, r))
else:
goodsnpcounts[ctg] += 1
# Tabulate all possible combinations
intra = ref + "-" + intra
inter = alt + "-" + inter
combinations[(intra, inter)] += 1
if bedfw:
print("\t".join(str(x) for x in \
(ctg, pos - 1, pos, locus)), file=bedfw)
if bedfw:
logging.debug("SNP locations written to `{0}`.".format(opts.bed))
bedfw.close()
nsites = sum(len(x) for x in snps.values())
sizes = Sizes(fastafile)
bpsize = sizes.totalsize
snprate = lambda a: a * 1000. / bpsize
m = "Dataset `{0}` contains {1} contigs ({2} bp).\n".\
format(fastafile, len(sizes), thousands(bpsize))
m += "A total of {0} SNPs within {1} contigs ({2} bp).\n".\
format(nsites, len(snps),
thousands(sum(sizes.mapping[x] for x in snps.keys())))
m += "SNP rate: {0:.1f}/Kb, ".format(snprate(nsites))
m += "IntraSNPs: {0} ({1:.1f}/Kb), InterSNPs: {2} ({3:.1f}/Kb)".\
format(intraSNPs, snprate(intraSNPs), interSNPs, snprate(interSNPs))
print(m, file=sys.stderr)
print(tabulate(combinations), file=sys.stderr)
leg = "Legend: A - homozygous same, B - homozygous different, X - heterozygous"
print(leg, file=sys.stderr)
def write_unplaced_agp(agpfile, scaffolds, unplaced_agp):
agp = AGP(agpfile)
scaffolds_seen = set(x.component_id for x in agp)
sizes = Sizes(scaffolds).mapping
fwagp = must_open(unplaced_agp, "w")
for s in natsorted(sizes.keys()):
if s in scaffolds_seen:
continue
order_to_agp(s, [(s, "?")], sizes, fwagp)
logging.debug("Write unplaced AGP to `{0}`.".format(unplaced_agp))