How to use bamnostic - 10 common examples

To help you get started, we’ve selected a few bamnostic examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github betteridiot / bamnostic / tests / test_build.py View on Github external
def test_header():
    with bs.AlignmentFile(bs.example_bam, 'rb') as bam:
        expected = {0: ('chr1', 1575), 1: ('chr2', 1584)}
        observed = bam.header()
        assert observed == expected
github betteridiot / bamnostic / tests / test_build.py View on Github external
def test_check_index():
    with bs.AlignmentFile(bs.example_bam) as bam:
        with pytest.warns(UserWarning):
            bam.check_index('not_a_file.bai')
github betteridiot / bamnostic / tests / test_build.py View on Github external
def test_first_read():
    with bs.AlignmentFile(bs.example_bam, 'rb') as bam:
        first_read = next(bam)
        assert first_read.read_name == 'EAS56_57:6:190:289:82'
github betteridiot / bamnostic / tests / test_build.py View on Github external
def test_get_index():
    with pytest.warns(UserWarning):
        bam_no_bai = bs.AlignmentFile(bs.example_bam, index_filename='not_a_file.bai')
github colomemaria / epiScanpy / episcanpy / count_matrix / _atac_mtx.py View on Github external
output_file.write('\t')
        output_file.write('\n')
    # close file to write   
    output_file.close()

    # start going through the bam files
    for name_file in list_bam_files[0:]:

        ## important variables for output
        index_feat = {key: 0 for key in chromosomes}    
        val_feat = {key: [0 for x in range(len(loaded_feat[key]))] for key in chromosomes}
    
        ## PART 1 read the bam file
        keep_lines = []
        #samfile = bs.AlignmentFile(path+output_file_name, mode="rb", check_sq=False)
        samfile = bs.AlignmentFile(path+name_file, mode="rb", check_sq=False)
        #for read in samfile.fetch(until_eof=True):
        for read in samfile:
            line = str(read).split('\t')
            if line[2][3:] in chromosomes:
                keep_lines.append(line[2:4])
            ### print -- output
        print(name_file, len(keep_lines), 'mapped reads')
        samfile.close()
        
        ## PART2 reads that fall into 
        for element in keep_lines:
            ## 2 things per line:
            chrom = element[0][3:]
            read_pos = int(element[1])
            max_value_index = len(loaded_feat[chrom])
            ## I want to check if the read map to a feature in the same chrom
github betteridiot / bamnostic / bamnostic / bai.py View on Github external
that share the same bin. A `Chunk` object in the context of
        this function is a `namedtuple` that contains the virtual offsets
        for the beginning and end of each of these chunks.

        Note: a special case of a chunk is in any Bin labeled as 37450.
        These bins always contain 2 chunks that provide the statistics
        of the number of reads that are unmapped to that reference.

        Args:
            n_chunks (int): number of chunks to be unpacked from stream

        Returns:
            chunks (list): a list of Chunk objects with the attributes of
                            chunks[i] are .voffset_beg and voffset_end
        """
        chunks = [Chunk(self._io) for chunk in range(n_chunks)]
        return chunks
github betteridiot / bamnostic / bamnostic / csi.py View on Github external
that share the same bin. A `Chunk` object in the context of
        this function is a `namedtuple` that contains the virtual offsets
        for the beginning and end of each of these chunks.

        Note: a special case of a chunk is in any Bin labeled as 37450.
        These bins always contain 2 chunks that provide the statistics
        of the number of reads that are unmapped to that reference.

        Args:
            n_chunks (int): number of chunks to be unpacked from stream

        Returns:
            chunks (list): a list of Chunk objects with the attributes of
                            chunks[i] are .voffset_beg and voffset_end
        """
        chunks = [bai.Chunk(self._io) for chunk in range(n_chunks)]
        return chunks
github betteridiot / bamnostic / bamnostic / bam.py View on Github external
Note:
            Slow, when compared to the C-API. Not meant for high-throughput analysis.

        Does not advance current iterator position.

        Args:
            AlignedSegment (:py:class:`bamnostic.AlignedSegment`): a bamnostic AlignedSegment read with a mate

        Returns:
            (:py:class:`bamnostic.AlignedSegment`): if read has a valid mate, else None

        Raises:
            ValueError: if AlignedSegment is unpaired

        """
        with bamnostic.AlignmentFile(self._handle.name, index_filename=self._index_path) as mate_head:

            # Don't look if there isn't a pair
            if not AlignedSegment.is_paired:
                raise ValueError('Read is unpaired')

            # Based on standard convention
            read_name_base = read_name_pat.search(AlignedSegment.read_name).groupdict()['read_name']
            rnext = AlignedSegment.next_reference_id

            # Look for available mate information
            if rnext < 0:
                return None  # Information is unavailable
            pnext = AlignedSegment.next_reference_start
            if pnext < 0:
                return None  # no information available on read
github betteridiot / bamnostic / bamnostic / bam.py View on Github external
mutliple_iterators (bool): Whether to use current file object or create a new one (default: False).

        Returns:
            head_reads (:py:obj:`list` of :py:obj:`AlignedSegment`): list of **n** reads from the front of the BAM file

        Example:
            >>> bam = bamnostic.AlignmentFile(bamnostic.example_bam, 'rb')
            >>> bam.head(n=5, multiple_iterators = False)[0] # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
            EAS56_57:6:190:289:82	...	MF:C:192

            >>> bam.head(n = 5)[1] # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
            EAS56_57:6:190:289:82	...	UQ:C:0

        """
        if multiple_iterators:
            head_iter = bamnostic.AlignmentFile(self._handle.name, index_filename=self._index_path)
        else:
            curr_pos = self.tell()
            # BAMheader uses byte specific positions (and not BGZF virtual offsets)
#             self._handle.seek(self._header._BAMheader_end)
            self._load_block(self._header._BAMheader_end)
            head_iter = self

        head_reads = [next(head_iter) for read in range(n)]

        if multiple_iterators:
            # close the independent file object
            head_iter.close()
        else:
            # otherwise, just go back to old position
            self.seek(curr_pos)
            assert self.tell() == curr_pos
github betteridiot / bamnostic / bamnostic / bai.py View on Github external
Returns:
            bins (None | dict): None if just indexing the index file or a dictionary
                                of `bin_id: chunks` pairs
        Raises:
            AssertionError (Exception): if bin 37450 does not contain 2 chunks exactly
        """
        bins = None if idx else {}

        for b in range(n_bins):
            bin_id, n_chunks = unpack_bid_nchunk(self._io.read(8))

            if idx:
                if bin_id == self._UNMAP_BIN:
                    assert n_chunks == 2, 'Bin 3740 is supposed to have 2 chunks. This has {}'.format(n_chunks)
                    unmapped = Unmapped(*unpack_unmapped(self._io.read(32)))
                    self.unmapped[ref_id] = unmapped
                else:
                    if not n_chunks == 0:
                        self._io.seek(16 * n_chunks, 1)  # 16 = struct.calcsize('<2Q')
            else:
                chunks = self.get_chunks(n_chunks)
                bins[bin_id] = chunks
        else:
            return bins