Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Print out a report for length of gaps and components.
"""
from jcvi.utils.table import tabulate
p = OptionParser(stats.__doc__)
p.add_option("--warn", default=False, action="store_true",
help="Warnings on small component spans [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(p.print_help())
agpfile, = args
agp = AGP(agpfile)
gap_lengths = []
component_lengths = []
for a in agp:
span = a.object_span
if a.is_gap:
label = a.gap_type
gap_lengths.append((span, label))
else:
label = "{0}:{1}-{2}".format(a.component_id, a.component_beg, \
a.component_end)
component_lengths.append((span, label))
if opts.warn and span < 50:
logging.error("component span too small ({0}):\n{1}".\
format(span, a))
table = dict()
p.add_option("--header", default=False, action="store_true",
help="Produce an AGP header [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
merge = opts.merge
agpfile, = args
if merge:
merged_agpfile = agpfile.replace(".agp", ".merged.agp")
fw = open(merged_agpfile, "w")
agp = AGP(agpfile)
sizes = []
data = [] # store merged AGPLine's
priorities = ("centromere", "telomere", "scaffold", "contig", \
"clone", "fragment")
for is_gap, alines in groupby(agp, key=lambda x: (x.object, x.is_gap)):
alines = list(alines)
is_gap = is_gap[1]
if is_gap:
gap_size = sum(x.gap_length for x in alines)
gap_types = set(x.gap_type for x in alines)
for gtype in ("centromere", "telomere"):
if gtype in gap_types:
gap_size = gtype
sizes.append(gap_size)
necessary mostly due to manual edits (insert/delete) that disrupts
the target coordinates.
"""
p = OptionParser(reindex.__doc__)
p.add_option("--nogaps", default=False, action="store_true",
help="Remove all gap lines [default: %default]")
p.add_option("--inplace", default=False, action="store_true",
help="Replace input file [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(p.print_help())
agpfile, = args
inplace = opts.inplace
agp = AGP(agpfile, validate=False)
pf = agpfile.rsplit(".", 1)[0]
newagpfile = pf + ".reindexed.agp"
fw = open(newagpfile, "w")
agp.transfer_header(fw)
for chr, chr_agp in groupby(agp, lambda x: x.object):
chr_agp = list(chr_agp)
object_beg = 1
for i, b in enumerate(chr_agp):
b.object_beg = object_beg
b.part_number = i + 1
if opts.nogaps and b.is_gap:
continue
if b.is_gap:
b.object_end = object_beg + b.gap_length - 1
mx, my = zip(*xy)
rho = spearmanr(mx, my)
rhos[mlg] = rho
flip = rho < 0
g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
extra = -3 * tip if x < .5 else 3 * tip
ha = "right" if x < .5 else "left"
mapname = mlg.split("-")[0]
tlg = shorten(mlg.replace("_", ".")) # Latex does not like underscore char
label = "{0} (w={1})".format(tlg, weights[mapname])
ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg],
ha=ha, va="center", rotation=90)
marker_pos.update(g.marker_pos)
agp = AGP(agpfile)
agp = [x for x in agp if x.object == seqid]
chrsize = max(x.object_end for x in agp)
# Pseudomolecules in the center
r = ystart - ystop
ratio = r / chrsize
f = lambda x: (ystart - ratio * x)
patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)
label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
ax1.text(.5, ystart + tip, label, ha="center")
scatter_data = defaultdict(list)
# Connecting lines
for b in s.markers:
"""
from jcvi.graphics.base import plt, savefig, normalize_axes, panel_labels, set2
p = OptionParser(estimategaps.__doc__)
opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)
if len(args) != 3:
sys.exit(not p.print_help())
pf, seqid, mlg = args
bedfile = pf + ".lifted.bed"
agpfile = pf + ".agp"
function = lambda x: x.cm
cc = Map(bedfile, scaffold_info=True, function=function)
agp = AGP(agpfile)
g = GapEstimator(cc, agp, seqid, mlg, function=function)
pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
spl, spld = g.spl, g.spld
g.compute_all_gaps(verbose=False)
fig = plt.figure(1, (iopts.w, iopts.h))
root = fig.add_axes([0, 0, 1, 1])
# Panel A
xstart, ystart = .15, .55
w, h = .7, .4
t = np.linspace(0, chrsize, 1000)
ax = fig.add_axes([xstart, ystart, w, h])
mx, my = zip(*g.scatter_data)
rho = spearmanr(mx, my)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
agpfile, contigs = args
outdir = opts.outdir
if not op.exists(outdir):
mkdir(outdir)
cmd = "faSplit byname {0} {1}/".format(contigs, outdir)
sh(cmd)
cutoff = Cutoff(opts.pctid, opts.hitlen, opts.hang)
logging.debug(str(cutoff))
agp = AGP(agpfile)
blastfile = agpfile.replace(".agp", ".blast")
if not op.exists(blastfile):
populate_blastfile(blastfile, agp, outdir, opts)
assert op.exists(blastfile)
logging.debug("File `{0}` found. Start loading.".format(blastfile))
blast = BlastSlow(blastfile).to_dict()
annealedagp = "annealed.agp"
annealedfasta = "annealed.fasta"
newagp = deepcopy(agp)
clrstore = {}
for a, b, qreverse in agp.iter_paired_components():
aid = a.component_id
bid = b.component_id
AC225490.9 chr6
Can optionally output scaffold gaps.
"""
p = OptionParser(tpf.__doc__)
p.add_option("--noversion", default=False, action="store_true",
help="Remove trailing accession versions [default: %default]")
p.add_option("--gaps", default=False, action="store_true",
help="Include gaps in the output [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
agpfile, = args
agp = AGP(agpfile)
for a in agp:
object = a.object
if a.is_gap:
if opts.gaps and a.isCloneGap:
print("\t".join((a.gap_type, object, "na")))
continue
component_id = a.component_id
orientation = a.orientation
if opts.noversion:
component_id = component_id.rsplit(".", 1)[0]
print("\t".join((component_id, object, orientation)))
p.add_option("--sep", default=".", help="Separator for splits")
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(p.print_help())
agpfile, bedfile = args
gaptype = opts.gaptype
splitobject = opts.splitobject
splitcomponent = opts.splitcomponent
sep = opts.sep
assert not (splitobject and splitcomponent), \
"Options --splitobject and --splitcomponent conflict"
agp = AGP(agpfile)
bed = Bed(bedfile)
simple_agp = agp.order
# agp lines to replace original ones, keyed by the component
agp_fixes = defaultdict(list)
newagpfile = agpfile.replace(".agp", ".masked.agp")
fw = open(newagpfile, "w")
if splitcomponent:
componentindex = defaultdict(int)
for component, intervals in bed.sub_beds():
i, a = simple_agp[component]
object = a.object
orientation = a.orientation
aagpfile, bagpfile = args
# First AGP provides the mapping
store = {}
agp = AGP(aagpfile)
for a in agp:
if a.is_gap:
continue
# Ignore '?' in the mapping
if a.sign == 0:
a.sign = 1
store[(a.object, a.object_beg, a.object_end)] = \
(a.component_id, a.component_beg, a.component_end, a.sign)
# Second AGP forms the backbone
agp = AGP(bagpfile)
fw = must_open(opts.outfile, "w")
print("\n".join(agp.header), file=fw)
for a in agp:
if a.is_gap:
print(a, file=fw)
continue
component_id, component_beg, component_end, sign = \
store[(a.component_id, a.component_beg, a.component_end)]
orientation = {1: '+', -1: '-', 0: '?'}.get(sign * a.sign)
atoms = (a.object, a.object_beg, a.object_end, a.part_number, a.component_type,
component_id, component_beg, component_end, orientation)
a = AGPLine("\t".join(str(x) for x in atoms))
print(a, file=fw)
total, l50, n50 = s.summary
r = {}
maps = []
fw = must_open(opts.outfile, "w")
print("*** Summary for each individual map ***", file=fw)
for mapname in mapnames:
markers = [x for x in cc if x.mapname == mapname]
ms = MapSummary(markers, l50, s)
r["Linkage Groups", mapname] = ms.num_lgs
ms.export_table(r, mapname, total)
maps.append(ms)
print(tabulate(r, sep=sep, align=align), file=fw)
r = {}
agp = AGP(chr_agp)
print("*** Summary for consensus map ***", file=fw)
consensus_scaffolds = set(x.component_id for x in agp if not x.is_gap)
oriented_scaffolds = set(x.component_id for x in agp \
if (not x.is_gap) and x.orientation != '?')
unplaced_scaffolds = set(s.mapping.keys()) - consensus_scaffolds
for mapname, sc in (("Anchored", consensus_scaffolds),
("Oriented", oriented_scaffolds),
("Unplaced", unplaced_scaffolds)):
markers = [x for x in cc if x.seqid in sc]
ms = MapSummary(markers, l50, s, scaffolds=sc)
ms.export_table(r, mapname, total)
print(tabulate(r, sep=sep, align=align), file=fw)