Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.samfile=pysam.Samfile( "ex1.bam","rb" )
def bed_to_interval(orig_bed, bam_file):
"""Add header and format BED bait and target files for Picard if necessary.
"""
with open(orig_bed) as in_handle:
line = in_handle.readline()
if line.startswith("@"):
yield orig_bed
else:
with pysam.Samfile(bam_file, "rb") as bam_handle:
header = bam_handle.text
with tmpfile(dir=os.path.dirname(orig_bed), prefix="picardbed") as tmp_bed:
with open(tmp_bed, "w") as out_handle:
out_handle.write(header)
with open(orig_bed) as in_handle:
for i, line in enumerate(in_handle):
parts = line.rstrip().split("\t")
if len(parts) == 4:
chrom, start, end, name = parts
strand = "+"
elif len(parts) >= 3:
chrom, start, end = parts[:3]
strand = "+"
name = "r%s" % i
out = [chrom, start, end, strand, name]
out_handle.write("\t".join(out) + "\n")
def main():
"""Main Program Procedure"""
options = parse_options(sys.argv)
contigs = dict()
time_total = time.time()
### iterate over alignment file(s)
for file in options.alignment.split(','):
options.is_bam = False
### open file stream
if file == '-':
infile = sys.stdin
elif (len(file) > 3 and file[-3:] == 'bam') or options.bam_force:
infile = pysam.Samfile(file, 'rb')
options.is_bam = True
else:
infile = open(file, 'r')
if options.verbose:
if options.alignment == '-':
print >> sys.stderr, "Reading alignment from stdin\n"
else:
print >> sys.stderr, "Reading alignment from %s\n" % options.alignment
### get contigs from alignment data
if len(contigs) == 0:
(contigs, lastline) = read_header(options, infile)
### TODO handle lastline (line after header) for SAM input
### check if we have a version on disk
def _rdc_chunk(bamfile, regions, min_mapq):
if isinstance(bamfile, str):
bamfile = pysam.Samfile(bamfile, 'rb')
for chrom, start, end, gene in regions.coords(["gene"]):
yield region_depth_count(bamfile, chrom, start, end, gene, min_mapq)
def parse_algs(bamFn, mapq, verbose):
"""Parse alignements."""
cov = rname = None
# parse algs
bam = pysam.Samfile(bamFn)
for alg in bam: #bam.fetch(reference='1'):
# skip low mapq, unmapped and secondary
if alg.mapq < mapq or alg.rname<0 or alg.is_secondary:
continue
if alg.rname != rname:
# report previous
if cov:
yield bam.references[rname], cov
# reset rname and chr2cov
rname = alg.rname
# stores +/- read counts
cov = [np.zeros(bam.lengths[rname]), np.zeros(bam.lengths[rname])]
# get strand
strand = 0
if alg.is_reverse:
strand = 1
def bam_filter(infile, outfile, criteria, failedfile=None, verbose=False):
if verbose:
sys.stderr.write('Input file : %s\n' % infile)
sys.stderr.write('Output file : %s\n' % outfile)
if failedfile:
sys.stderr.write('Failed reads: %s\n' % failedfile)
sys.stderr.write('Criteria:\n')
for criterion in criteria:
sys.stderr.write(' %s\n' % criterion)
sys.stderr.write('\n')
bamfile = pysam.Samfile(infile, "rb")
outfile = pysam.Samfile(outfile, "wb", template=bamfile)
if failedfile:
failed_out = open(failedfile, 'w')
else:
failed_out = None
passed = 0
failed = 0
def _callback(read):
return "%s | %s kept,%s failed" % ('%s:%s' % (bamfile.getrname(read.tid), read.pos) if read.tid > -1 else 'unk', passed, failed)
for read in bam_iter(bamfile):
p = True
RG['LB'] = 'bamsurgeon'
RG['CN'] = 'BS'
if 'PU' in RG and RG['PU'] not in PURG:
PU = str(uuid4())
PURG[RG['PU']] = PU
RG['PU'] = PU
if 'ID' in RG and RG['ID'] not in IDRG:
ID = str(uuid4())
IDRG[RG['ID']] = ID
RG['ID'] = ID
if 'PG' in header:
del header['PG']
header['PG'] = [{'ID': 'bamsurgeon', 'PN': 'bamsurgeon'}]
outsam = pysam.Samfile(outsamfn, 'wh', header=header)
outsam.close()
paired = {} # track read pairs
# counters for debug
n = 0 # number of reads
p = 0 # numbar of paired reads
u = 0 # number of unpaired reads
w = 0 # reads written
m = 0 # mates found
fixed_strand = 0
fixed_rg_pair = 0
fixed_matepos = 0
fixed_tlen = 0
fixed_unmap = 0
def replace(origbamfile, mutbamfile, outbamfile, seed=None):
''' open .bam file and call replacereads
'''
origbam = pysam.Samfile(origbamfile, 'rb')
mutbam = pysam.Samfile(mutbamfile, 'rb')
outbam = pysam.Samfile(outbamfile, 'wb', template=origbam)
rr.replaceReads(origbam, mutbam, outbam, keepqual=True, seed=seed)
origbam.close()
mutbam.close()
outbam.close()
#!/usr/bin/env python
import pysam
import sys
from re import sub
from random import random
from uuid import uuid4
if len(sys.argv) == 2:
assert sys.argv[1].endswith('.bam')
inbamfn = sys.argv[1]
outbamfn = sub('.bam$', '.renamereads.bam', inbamfn)
inbam = pysam.Samfile(inbamfn, 'rb')
outbam = pysam.Samfile(outbamfn, 'wb', template=inbam)
paired = {}
n = 0
p = 0
u = 0
w = 0
m = 0
for read in inbam.fetch(until_eof=True):
n += 1
if read.is_paired:
p += 1
if read.qname in paired:
uuid = paired[read.qname]
del paired[read.qname]
baseReadsFile = os.path.basename(mappedReadsFile)
baseReadsFile = re.sub(r'\.bam$|\.sam$', '', baseReadsFile)
# Open handlers for output files
handle_valid = open(outputDir + '/' + baseReadsFile + '.validPairs', 'w')
if allOutput:
handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w')
handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs','w')
handle_filt = open(outputDir + '/' + baseReadsFile + '.FiltPairs','w')
# Read the SAM/BAM file
if verbose:
print "## Opening SAM/BAM file '", mappedReadsFile, "'..."
samfile = pysam.Samfile(mappedReadsFile, "rb")
# Reads are 0-based too (for both SAM and BAM format)
# Loop on all reads
for read in samfile.fetch(until_eof=True):
reads_counter += 1
cur_handler = None
interactionType = None
htag = ""
# First mate
if read.is_read1:
r1 = read
if not r1.is_unmapped:
r1_chrom = samfile.getrname(r1.tid)
else:
r1_chrom = None