How to use the pysam.Samfile function in pysam

To help you get started, we’ve selected a few pysam examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pysam-developers / pysam / save / pysam_test2.6.py View on Github external
def setUp(self):
        self.samfile=pysam.Samfile( "ex1.bam","rb" )
github bcbio / bcbio-nextgen / bcbio / broad / metrics.py View on Github external
def bed_to_interval(orig_bed, bam_file):
    """Add header and format BED bait and target files for Picard if necessary.
    """
    with open(orig_bed) as in_handle:
        line = in_handle.readline()
    if line.startswith("@"):
        yield orig_bed
    else:
        with pysam.Samfile(bam_file, "rb") as bam_handle:
            header = bam_handle.text
        with tmpfile(dir=os.path.dirname(orig_bed), prefix="picardbed") as tmp_bed:
            with open(tmp_bed, "w") as out_handle:
                out_handle.write(header)
                with open(orig_bed) as in_handle:
                    for i, line in enumerate(in_handle):
                        parts = line.rstrip().split("\t")
                        if len(parts) == 4:
                            chrom, start, end, name = parts
                            strand = "+"
                        elif len(parts) >= 3:
                            chrom, start, end = parts[:3]
                            strand = "+"
                            name = "r%s" % i
                        out = [chrom, start, end, strand, name]
                        out_handle.write("\t".join(out) + "\n")
github ratschlab / RiboDiff / tools / pipeline / count_reads / count_expression.pysam.py View on Github external
def main():
    """Main Program Procedure"""
    
    options = parse_options(sys.argv)
    contigs = dict()

    time_total = time.time()

    ### iterate over alignment file(s)
    for file in options.alignment.split(','):
        options.is_bam = False
        ### open file stream
        if file == '-':
            infile = sys.stdin
        elif (len(file) > 3 and file[-3:] == 'bam') or options.bam_force:
            infile = pysam.Samfile(file, 'rb')
            options.is_bam = True
        else:
            infile = open(file, 'r')

        if options.verbose:
            if options.alignment == '-':
                print >> sys.stderr, "Reading alignment from stdin\n"
            else:
                print >> sys.stderr, "Reading alignment from %s\n" % options.alignment

        ### get contigs from alignment data
        if len(contigs) == 0:
            (contigs, lastline) = read_header(options, infile)
            ### TODO handle lastline (line after header) for SAM input

            ### check if we have a version on disk
github etal / cnvkit / cnvlib / coverage.py View on Github external
def _rdc_chunk(bamfile, regions, min_mapq):
    if isinstance(bamfile, str):
        bamfile = pysam.Samfile(bamfile, 'rb')
    for chrom, start, end, gene in regions.coords(["gene"]):
        yield region_depth_count(bamfile, chrom, start, end, gene, min_mapq)
github lpryszcz / bin / bam2enhancers.py View on Github external
def parse_algs(bamFn, mapq, verbose):
    """Parse alignements."""
    cov = rname = None
    # parse algs
    bam = pysam.Samfile(bamFn)
    for alg in bam: #bam.fetch(reference='1'):
        # skip low mapq, unmapped and secondary
        if alg.mapq < mapq or alg.rname<0 or alg.is_secondary:
            continue
        if alg.rname != rname:
            # report previous
            if cov:
                yield bam.references[rname], cov
            # reset rname and chr2cov
            rname = alg.rname
            # stores +/- read counts
            cov = [np.zeros(bam.lengths[rname]), np.zeros(bam.lengths[rname])]
        # get strand
        strand = 0
        if alg.is_reverse:
            strand = 1
github ngsutils / ngsutils / ngsutils / bam / filter.py View on Github external
def bam_filter(infile, outfile, criteria, failedfile=None, verbose=False):
    if verbose:
        sys.stderr.write('Input file  : %s\n' % infile)
        sys.stderr.write('Output file : %s\n' % outfile)
        if failedfile:
            sys.stderr.write('Failed reads: %s\n' % failedfile)
        sys.stderr.write('Criteria:\n')
        for criterion in criteria:
            sys.stderr.write('    %s\n' % criterion)

        sys.stderr.write('\n')

    bamfile = pysam.Samfile(infile, "rb")
    outfile = pysam.Samfile(outfile, "wb", template=bamfile)

    if failedfile:
        failed_out = open(failedfile, 'w')
    else:
        failed_out = None

    passed = 0
    failed = 0

    def _callback(read):
        return "%s | %s kept,%s failed" % ('%s:%s' % (bamfile.getrname(read.tid), read.pos) if read.tid > -1 else 'unk', passed, failed)

    for read in bam_iter(bamfile):
        p = True
github adamewing / bamsurgeon / bamsurgeon / postprocess.py View on Github external
RG['LB'] = 'bamsurgeon'
            RG['CN'] = 'BS'
            if 'PU' in RG and RG['PU'] not in PURG:
                PU = str(uuid4())
                PURG[RG['PU']] = PU
                RG['PU'] = PU
            if 'ID' in RG and RG['ID'] not in IDRG:
                ID = str(uuid4())
                IDRG[RG['ID']] = ID
                RG['ID'] = ID 

    if 'PG' in header:
        del header['PG']
        header['PG'] = [{'ID': 'bamsurgeon', 'PN': 'bamsurgeon'}]

    outsam = pysam.Samfile(outsamfn, 'wh', header=header)
    outsam.close()

    paired = {} # track read pairs

    # counters for debug
    n = 0 # number of reads
    p = 0 # numbar of paired reads
    u = 0 # number of unpaired reads
    w = 0 # reads written
    m = 0 # mates found

    fixed_strand  = 0
    fixed_rg_pair = 0
    fixed_matepos = 0
    fixed_tlen    = 0
    fixed_unmap   = 0
github adamewing / bamsurgeon / bin / addindel.py View on Github external
def replace(origbamfile, mutbamfile, outbamfile, seed=None):
    ''' open .bam file and call replacereads
    '''
    origbam = pysam.Samfile(origbamfile, 'rb')
    mutbam  = pysam.Samfile(mutbamfile, 'rb')
    outbam  = pysam.Samfile(outbamfile, 'wb', template=origbam)

    rr.replaceReads(origbam, mutbam, outbam, keepqual=True, seed=seed)

    origbam.close()
    mutbam.close()
    outbam.close()
github adamewing / bamsurgeon / scripts / rename_reads.py View on Github external
#!/usr/bin/env python

import pysam
import sys
from re import sub
from random import random
from uuid import uuid4

if len(sys.argv) == 2:
    assert sys.argv[1].endswith('.bam')
    inbamfn = sys.argv[1]
    outbamfn = sub('.bam$', '.renamereads.bam', inbamfn)

    inbam = pysam.Samfile(inbamfn, 'rb')
    outbam = pysam.Samfile(outbamfn, 'wb', template=inbam)

    paired = {}

    n = 0
    p = 0
    u = 0
    w = 0
    m = 0

    for read in inbam.fetch(until_eof=True):
        n += 1
        if read.is_paired:
            p += 1
            if read.qname in paired:
                uuid = paired[read.qname]
                del paired[read.qname]
github nservant / HiC-Pro / scripts / mapped_2hic_dnase.py View on Github external
baseReadsFile = os.path.basename(mappedReadsFile)
    baseReadsFile = re.sub(r'\.bam$|\.sam$', '', baseReadsFile)

    # Open handlers for output files
    handle_valid = open(outputDir + '/' + baseReadsFile + '.validPairs', 'w')

    if allOutput:
        handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w')
        handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs','w')
        handle_filt = open(outputDir + '/' + baseReadsFile + '.FiltPairs','w')

    # Read the SAM/BAM file
    if verbose:
        print "## Opening SAM/BAM file '", mappedReadsFile, "'..."
    samfile = pysam.Samfile(mappedReadsFile, "rb")

    # Reads are 0-based too (for both SAM and BAM format)
    # Loop on all reads
    for read in samfile.fetch(until_eof=True):
        reads_counter += 1
        cur_handler = None
        interactionType = None
        htag = ""

        # First mate
        if read.is_read1:
            r1 = read
            if not r1.is_unmapped:
                r1_chrom = samfile.getrname(r1.tid)
            else:
                r1_chrom = None