Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
seen.add(gapid)
gi, gb = gorder[gapid]
gpbed.append(gb)
gappositions[(gb.seqid, gb.start, gb.end)] = gapid
gap_to_scf[gapid].append((unplaced, strand))
gpbedfile = "candidate.gaps.bed"
gpbed.print_to_file(gpbedfile, sorted=True)
agpfile = agp([chrfasta])
maskedagpfile = mask([agpfile, gpbedfile])
maskedbedfile = maskedagpfile.rsplit(".", 1)[0] + ".bed"
bed([maskedagpfile, "--outfile={0}".format(maskedbedfile)])
mbed = Bed(maskedbedfile)
finalbed = Bed()
for b in mbed:
sid = b.seqid
key = (sid, b.start, b.end)
if key not in gappositions:
finalbed.add("{0}\n".format(b))
continue
gapid = gappositions[key]
scfs = gap_to_scf[gapid]
# For scaffolds placed in the same gap, sort according to positions
scfs.sort(key=lambda x: corder[x[0]][1].start + corder[x[0]][1].end)
for scf, strand in scfs:
size = sizes[scf]
finalbed.add("\t".join(str(x) for x in \
(scf, 0, size, sid, 1000, strand)))
sr = Range("0", mins, maxs, maxs - mins, id)
qranges.append(qr)
sranges.append(sr)
qpads = list(get_segments(qranges, qextra))
spads = list(get_segments(sranges, sextra))
suffix = ".pad.bed"
qpf = opts.qbed.split(".")[0]
spf = opts.sbed.split(".")[0]
qpadfile = qpf + suffix
spadfile = spf + suffix
qnpads, qpadnames = write_PAD_bed(qpadfile, qpf, qpads, qbed)
snpads, spadnames = write_PAD_bed(spadfile, spf, spads, sbed)
qpadbed, spadbed = Bed(qpadfile), Bed(spadfile)
logmp = make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames)
m, n = logmp.shape
matrixfile = ".".join((qpf, spf, "logmp.txt"))
fw = open(matrixfile, "w")
header = ["o"] + spadnames
print("\t".join(header), file=fw)
for i in xrange(m):
row = [qpadnames[i]] + ["{0:.1f}".format(x) for x in logmp[i, :]]
print("\t".join(row), file=fw)
fw.close()
# Run CLUSTER 3.0 (Pearson correlation, average linkage)
cmd = op.join(opts.path, "cluster")
the plots are one contig/scaffold per plot.
"""
from jcvi.graphics.base import set_image_options
from jcvi.utils.iter import grouper
p = OptionParser(plot.__doc__)
p.add_option("--cutoff", type="int", default=1000000,
help="Plot scaffolds with size larger than [default: %default]")
opts, args, iopts = set_image_options(p, args, figsize="14x8", dpi=150)
if len(args) < 4 or len(args) % 3 != 1:
sys.exit(not p.print_help())
scafsizes = Sizes(args[0])
trios = list(grouper(3, args[1:]))
trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios]
for scaffoldID, scafsize in scafsizes.iter_sizes():
if scafsize < opts.cutoff:
continue
logging.debug("Loading {0} (size={1})".format(scaffoldID,
thousands(scafsize)))
tmpname = scaffoldID + ".sizes"
tmp = open(tmpname, "w")
tmp.write("{0}\t{1}".format(scaffoldID, scafsize))
tmp.close()
tmpsizes = Sizes(tmpname)
tmpsizes.close(clean=True)
imagename = ".".join((scaffoldID, opts.format))
p.add_option("--maxsize", default=300000, type="int",
help="Maximum size of patchers to be replaced [default: %default]")
p.add_option("--prefix", help="Prefix of the new object [default: %default]")
p.add_option("--strict", default=False, action="store_true",
help="Only update if replacement has no gaps [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 4:
sys.exit(not p.print_help())
pbed, pfasta, bbfasta, altfasta = args
maxsize = opts.maxsize # Max DNA size to replace gap
rclip = opts.rclip
blastfile = blast([altfasta, pfasta,"--wordsize=100", "--pctid=99"])
order = Bed(pbed).order
beforebed, afterbed = blast_to_twobeds(blastfile, order, rclip=rclip,
maxsize=maxsize)
beforefasta = fastaFromBed(beforebed, bbfasta, name=True, stranded=True)
afterfasta = fastaFromBed(afterbed, altfasta, name=True, stranded=True)
# Exclude the replacements that contain more Ns than before
ah = SeqIO.parse(beforefasta, "fasta")
bh = SeqIO.parse(afterfasta, "fasta")
count_Ns = lambda x: x.seq.count('n') + x.seq.count('N')
exclude = set()
for arec, brec in zip(ah, bh):
an = count_Ns(arec)
bn = count_Ns(brec)
if opts.strict:
if bn == 0:
qsizes = Sizes(qsizes, select=opts.qselect)
ssizes = Sizes(ssizes, select=opts.sselect)
blastfile, = args
image_name = op.splitext(blastfile)[0] + "." + opts.format
plt.rcParams["xtick.major.pad"] = 16
plt.rcParams["ytick.major.pad"] = 16
# Fix the width
xsize, ysize = qsizes.totalsize, ssizes.totalsize
# get highlight beds
qh, sh = opts.qh, opts.sh
qh = Bed(qh) if qh else None
sh = Bed(sh) if sh else None
highlights = (qh, sh) if qh or sh else None
ratio = ysize * 1. / xsize if proportional else 1
width = iopts.w
height = iopts.h * ratio
fig = plt.figure(1, (width, height))
root = fig.add_axes([0, 0, 1, 1]) # the whole canvas
ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot
blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed,
style=opts.dotstyle, proportional=proportional, sampleN=opts.nmax,
baseticks=True, stripNames=opts.stripNames, highlights=highlights)
# add genome names
to_ax_label = lambda fname: op.basename(fname).split(".")[0]
def intersectBed_wao(abedfile, bbedfile, minOverlap=0):
abed = Bed(abedfile)
bbed = Bed(bbedfile)
print("`{0}` has {1} features.".format(abedfile, len(abed)), file=sys.stderr)
print("`{0}` has {1} features.".format(bbedfile, len(bbed)), file=sys.stderr)
cmd = "intersectBed -wao -a {0} -b {1}".format(abedfile, bbedfile)
acols = abed[0].nargs
bcols = bbed[0].nargs
fp = popen(cmd)
for row in fp:
atoms = row.split()
aline = "\t".join(atoms[:acols])
bline = "\t".join(atoms[acols:acols + bcols])
c = int(atoms[-1])
if c < minOverlap:
continue
a = BedLine(aline)
try:
help="Only plot maximum of N dots [default: %default]")
opts, args, iopts = p.set_image_options(figsize="8x8", style="dark", dpi=150)
qsizes, ssizes = opts.qsizes, opts.ssizes
qbed, sbed = opts.qbed, opts.sbed
proportional = opts.proportional
if len(args) != 1:
sys.exit(not p.print_help())
if qbed:
qsizes = qsizes or sizes([qbed])
qbed = Bed(qbed)
if sbed:
ssizes = ssizes or sizes([sbed])
sbed = Bed(sbed)
assert qsizes and ssizes, \
"You must specify at least one of --sizes of --bed"
qsizes = Sizes(qsizes, select=opts.qselect)
ssizes = Sizes(ssizes, select=opts.sselect)
blastfile, = args
image_name = op.splitext(blastfile)[0] + "." + opts.format
plt.rcParams["xtick.major.pad"] = 16
plt.rcParams["ytick.major.pad"] = 16
# Fix the width
xsize, ysize = qsizes.totalsize, ssizes.totalsize
p = OptionParser(random.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
bedfile, N = args
assert is_number(N)
b = Bed(bedfile)
NN = flexible_cast(N)
if NN < 1:
NN = int(round(NN * len(b)))
beds = sample(b, NN)
new_bed = Bed()
new_bed.extend(beds)
outfile = bedfile.rsplit(".", 1)[0] + ".{0}.bed".format(N)
new_bed.print_to_file(outfile)
logging.debug("Write {0} features to `{1}`".format(NN, outfile))
nogapsfw.close()
largestgapsfw.close()
beds = [largestgapsbed]
toclean = [nogapsbed, largestgapsbed]
if opts.closest:
closestgapsbed = pf + ".closestgaps.bed"
cmd = "closestBed -a {0} -b {1} -d".format(nogapsbed, gapsbed)
sh(cmd, outfile=closestgapsbed)
beds += [closestgapsbed]
toclean += [closestgapsbed]
else:
pointbed = pf + ".point.bed"
pbed = Bed()
bed = Bed(nogapsbed)
for b in bed:
pos = (b.start + b.end) / 2
b.start, b.end = pos, pos
pbed.append(b)
pbed.print_to_file(pointbed)
beds += [pointbed]
toclean += [pointbed]
refinedbed = pf + ".refined.bed"
FileMerger(beds, outfile=refinedbed).merge()
# Clean-up
FileShredder(toclean)
return refinedbed
exclude.add(id)
logging.debug("Ignore {0} updates because of decreasing quality."\
.format(len(exclude)))
abed = Bed(beforebed, sorted=False)
bbed = Bed(afterbed, sorted=False)
abed = [x for x in abed if x.accn not in exclude]
bbed = [x for x in bbed if x.accn not in exclude]
abedfile = "before.filtered.bed"
bbedfile = "after.filtered.bed"
afbed = Bed()
afbed.extend(abed)
bfbed = Bed()
bfbed.extend(bbed)
afbed.print_to_file(abedfile)
bfbed.print_to_file(bbedfile)
shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)