Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_header():
with bs.AlignmentFile(bs.example_bam, 'rb') as bam:
expected = {0: ('chr1', 1575), 1: ('chr2', 1584)}
observed = bam.header()
assert observed == expected
def test_check_index():
with bs.AlignmentFile(bs.example_bam) as bam:
with pytest.warns(UserWarning):
bam.check_index('not_a_file.bai')
def test_first_read():
with bs.AlignmentFile(bs.example_bam, 'rb') as bam:
first_read = next(bam)
assert first_read.read_name == 'EAS56_57:6:190:289:82'
def test_get_index():
with pytest.warns(UserWarning):
bam_no_bai = bs.AlignmentFile(bs.example_bam, index_filename='not_a_file.bai')
output_file.write('\t')
output_file.write('\n')
# close file to write
output_file.close()
# start going through the bam files
for name_file in list_bam_files[0:]:
## important variables for output
index_feat = {key: 0 for key in chromosomes}
val_feat = {key: [0 for x in range(len(loaded_feat[key]))] for key in chromosomes}
## PART 1 read the bam file
keep_lines = []
#samfile = bs.AlignmentFile(path+output_file_name, mode="rb", check_sq=False)
samfile = bs.AlignmentFile(path+name_file, mode="rb", check_sq=False)
#for read in samfile.fetch(until_eof=True):
for read in samfile:
line = str(read).split('\t')
if line[2][3:] in chromosomes:
keep_lines.append(line[2:4])
### print -- output
print(name_file, len(keep_lines), 'mapped reads')
samfile.close()
## PART2 reads that fall into
for element in keep_lines:
## 2 things per line:
chrom = element[0][3:]
read_pos = int(element[1])
max_value_index = len(loaded_feat[chrom])
## I want to check if the read map to a feature in the same chrom
that share the same bin. A `Chunk` object in the context of
this function is a `namedtuple` that contains the virtual offsets
for the beginning and end of each of these chunks.
Note: a special case of a chunk is in any Bin labeled as 37450.
These bins always contain 2 chunks that provide the statistics
of the number of reads that are unmapped to that reference.
Args:
n_chunks (int): number of chunks to be unpacked from stream
Returns:
chunks (list): a list of Chunk objects with the attributes of
chunks[i] are .voffset_beg and voffset_end
"""
chunks = [Chunk(self._io) for chunk in range(n_chunks)]
return chunks
that share the same bin. A `Chunk` object in the context of
this function is a `namedtuple` that contains the virtual offsets
for the beginning and end of each of these chunks.
Note: a special case of a chunk is in any Bin labeled as 37450.
These bins always contain 2 chunks that provide the statistics
of the number of reads that are unmapped to that reference.
Args:
n_chunks (int): number of chunks to be unpacked from stream
Returns:
chunks (list): a list of Chunk objects with the attributes of
chunks[i] are .voffset_beg and voffset_end
"""
chunks = [bai.Chunk(self._io) for chunk in range(n_chunks)]
return chunks
Note:
Slow, when compared to the C-API. Not meant for high-throughput analysis.
Does not advance current iterator position.
Args:
AlignedSegment (:py:class:`bamnostic.AlignedSegment`): a bamnostic AlignedSegment read with a mate
Returns:
(:py:class:`bamnostic.AlignedSegment`): if read has a valid mate, else None
Raises:
ValueError: if AlignedSegment is unpaired
"""
with bamnostic.AlignmentFile(self._handle.name, index_filename=self._index_path) as mate_head:
# Don't look if there isn't a pair
if not AlignedSegment.is_paired:
raise ValueError('Read is unpaired')
# Based on standard convention
read_name_base = read_name_pat.search(AlignedSegment.read_name).groupdict()['read_name']
rnext = AlignedSegment.next_reference_id
# Look for available mate information
if rnext < 0:
return None # Information is unavailable
pnext = AlignedSegment.next_reference_start
if pnext < 0:
return None # no information available on read
mutliple_iterators (bool): Whether to use current file object or create a new one (default: False).
Returns:
head_reads (:py:obj:`list` of :py:obj:`AlignedSegment`): list of **n** reads from the front of the BAM file
Example:
>>> bam = bamnostic.AlignmentFile(bamnostic.example_bam, 'rb')
>>> bam.head(n=5, multiple_iterators = False)[0] # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
EAS56_57:6:190:289:82 ... MF:C:192
>>> bam.head(n = 5)[1] # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
EAS56_57:6:190:289:82 ... UQ:C:0
"""
if multiple_iterators:
head_iter = bamnostic.AlignmentFile(self._handle.name, index_filename=self._index_path)
else:
curr_pos = self.tell()
# BAMheader uses byte specific positions (and not BGZF virtual offsets)
# self._handle.seek(self._header._BAMheader_end)
self._load_block(self._header._BAMheader_end)
head_iter = self
head_reads = [next(head_iter) for read in range(n)]
if multiple_iterators:
# close the independent file object
head_iter.close()
else:
# otherwise, just go back to old position
self.seek(curr_pos)
assert self.tell() == curr_pos
Returns:
bins (None | dict): None if just indexing the index file or a dictionary
of `bin_id: chunks` pairs
Raises:
AssertionError (Exception): if bin 37450 does not contain 2 chunks exactly
"""
bins = None if idx else {}
for b in range(n_bins):
bin_id, n_chunks = unpack_bid_nchunk(self._io.read(8))
if idx:
if bin_id == self._UNMAP_BIN:
assert n_chunks == 2, 'Bin 3740 is supposed to have 2 chunks. This has {}'.format(n_chunks)
unmapped = Unmapped(*unpack_unmapped(self._io.read(32)))
self.unmapped[ref_id] = unmapped
else:
if not n_chunks == 0:
self._io.seek(16 * n_chunks, 1) # 16 = struct.calcsize('<2Q')
else:
chunks = self.get_chunks(n_chunks)
bins[bin_id] = chunks
else:
return bins