How to use the pysam.index function in pysam

To help you get started, we’ve selected a few pysam examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jts / nanopolish / scripts / extract_reads_aligned_to_region.py View on Github external
l = line.split("\t")
			read_id = l.pop(0)
			if read_id in region_read_ids:
				fast5_file = l.pop(0)
				region_fast5_files[str(read_id)] = fast5_file.rstrip()

	# --------------------------------------------------------
	# PART 5:   Make a region BAM and BAI file
	# --------------------------------------------------------
	new_bam = "reads.bam"
	custom_print( "[ Writing to a new BAM file ] \t" + new_bam )
	region_reads = pysam.view("-b", o_bam, draft_ga_coords, "-o", new_bam, catch_stdout=False)
	
	new_bam_index = new_bam + ".bai"
	custom_print( "[ Writing to a new BAI file ] \t" + new_bam_index )
	pysam.index(new_bam, new_bam_index)

	# --------------------------------------------------------
	# PART 6: 	With user input ref coords, extract all 
	#		aligned	reads within these coordinates 
	#		and make new FASTA file
	# --------------------------------------------------------
	# detect type of sequences file then handle accordingly
	file_type = detect_fa_filetype(o_fa)
	new_fa = "reads.fasta"
	custom_print( "[ Writing to a new fasta file ]\t" +  new_fa )
	with open (new_fa, "w") as fout:
		if ".gz" in file_type:
			with gzip.open(o_fa, "rt") as fin:
				if "fasta.gz" in file_type:
					for title, seq in SimpleFastaParser(handle):
						name = title.split(None, 1)[0]
github rsemeraro / PyPore / lib / alg_routines_unix.py View on Github external
stdout=subprocess.PIPE)
            PairDict = sam_parser(ngmlrline, ngmlr_ext_dir)
        else:
            sam_nglmr_file = os.path.join(ngmlr_ext_dir, (prefix + '.sam'))
            log.debug(
                '[Alignment][ngmlral] - ngmlr -t %s -r %s -q %s -o %s -x ont' % (th, ref, fast_Q_file, sam_ngmlr_file))
            ngmlrline = subprocess.Popen(
                ['ngmlr', '-t', str(th), '-r', str(ref), '-q', str(fast_Q_file), '-o', str(sam_ngmlr_file), '-x ont'],
                stdout=subprocess.PIPE).wait()
            outputfilebam = os.path.join(ngmlr_ext_dir, (prefix + '.tmp.bam'))
            log.debug('[Alignment][ngmlral] - samtools view -Sb -@ %s %s -o %s' % (th, sam_nglmr_file, outputfilebam))
            pysam.view("-Sb", "-@%s" % str(th), sam_nglmr_file, "-o%s" % outputfilebam, catch_stdout=False)
            os.remove(sam_nglmr_file)
            pysam.sort(outputfilebam, "-o%s" % bam_ngmlr_file, catch_stdout=False)
            log.debug('[Alignment][ngmlral] - samtools index %s -@%s' % (bam_ngmlr_file, str(th)))
            pysam.index(bam_ngmlr_file, "-@%s" % str(th), catch_stdout=False)
            os.remove(outputfilebam)
    else:
        log.warning('[Alignment][ngmlral] - file %s already exists!' % bam_ngmlr_file)
    try:
        shutil.move(ngmlr_ext_dir, os.path.join(work_dir, out_dir))
    except shutil.Error:
        log.error("Unable to move %s" % ngmlr_ext_dir)
github ga4gh / ga4gh-server / ga4gh / repo_manager.py View on Github external
self._assertPathEmpty(destPath, inRepo=True)
        self._moveFile(filePath, destPath, moveMode)

        # move the index file if it exists, otherwise do indexing
        indexPath = os.path.join(
            os.path.split(filePath)[0],
            readGroupSetName + self.bamIndexExtension)
        indexMessage = ""
        if os.path.exists(indexPath):
            dstDir = os.path.split(destPath)[0]
            self._moveFile(
                indexPath,
                os.path.join(dstDir, os.path.basename(indexPath)),
                moveMode)
        else:
            pysam.index(destPath.encode('utf-8'))
            indexMessage = " (and indexed)"

        # finish
        self._repoEmit("ReadGroupSet '{}' added to dataset '{}'{}".format(
            fileName, datasetName, indexMessage))
github CGATOxford / cgat / CGATPipelines / pipeline_cufflinks_optimization.py View on Github external
@transform(reduceBamToChr19, suffix(".bam"), ".bam.bai")
def indexBam(infile, outfile):
    '''
    index the reduced bam file
    '''
    pysam.index(infile)
github nanoporetech / medaka / medaka / smolecule.py View on Github external
"""
    mode = 'wb' if bam else 'w'
    with pysam.AlignmentFile(fname, mode, header=header) as fh:
        for ref_id, subreads in enumerate(alignments):
            for aln in sorted(subreads, key=lambda x: x.rstart):
                a = pysam.AlignedSegment()
                a.reference_id = ref_id
                a.query_name = aln.qname
                a.query_sequence = aln.seq
                a.reference_start = aln.rstart
                a.cigarstring = aln.cigar
                a.flag = aln.flag
                a.mapping_quality = 60
                fh.write(a)
    if mode == 'wb':
        pysam.index(fname)
github galaxyproject / galaxy / tools / rgenetics / bams2mx.py View on Github external
sampName = sampName.replace(')','') # for R
        if d.get(sampName,None):
            print >> sys.stdout, '##ERROR: Duplicate input %s - ignored!!' % sampName
            continue
        d[sampName] = {} # new data
        outreads = 0 # count of reads outside each bed region for library size normalization 
        ignoredoutreads = 0
        outchrom = None
        outstart = None
        outend = None
        notmapped = 0
        # pysam expects to find infile.bai
        bam_index = "%s.bai" % f
        if (not os.path.isfile(bam_index)):
            print >> sys.stdout,'indexing ',f
            pysam.index(f)
        samfile = pysam.Samfile( f, "rb" )
        nbam = 0
        for c in chroms:
            try:
                nbam += samfile.count(c,1,999999999) # for non bed region count
            except:
                pass
        print 'bam %s has %d total reads' % (f,nbam)
        ignored = 0 # this is per sample over all regions
        ignoredoutreads = 0 # ditto but for total reads outside regions fudgtimate
        for bedrec in dat:
            hits = 0 # count by contig
            chrom = bedrec[0].strip()
            start = int(bedrec[1]) # new contig start becomes old inter-region end
            end = int(bedrec[2])
            regionID = bedrec[3]
github 10XGenomics / cellranger / tenkit / lib / python / tenkit / bam.py View on Github external
def sort_and_index(file_name, sorted_prefix=None):
    """ Sorts and indexes the bam file given by file_name.
    """
    if sorted_prefix is None:
        sorted_prefix = file_name.replace('.bam', '') + '_sorted'

    sorted_name = sorted_prefix + '.bam'
    log_subprocess.check_call(['samtools','sort', '-o', sorted_name, file_name])
    pysam.index(sorted_name)
github martijnvermaat / bio-playground / bam-coverage / bam_coverage.py View on Github external
def indexed_bam(bam_file):
    if not os.path.exists(bam_file + '.bai'):
        pysam.index(bam_file)
    bam = pysam.Samfile(bam_file, 'rb')
    yield bam
    bam.close()
github CGATOxford / CGATPipelines / CGATPipelines / PipelineIntervals.py View on Github external
input_sam = pysam.Samfile(input_file, "rb")
    input_nreads = 0
    for read in input_sam:
        input_nreads += 1

    if input_nreads > sample_nreads:
        E.info("INPUT bam has %s reads, SAMPLE bam has %s reads"
               % (input_nreads, sample_nreads))
        E.info("INPUT being downsampled to match SAMPLE")
        input_outfile = normalize(input_file,
                                  input_nreads,
                                  input_outfile,
                                  sample_nreads)
        shutil.copyfile(sample_file, sample_outfile)
        pysam.index(sample_outfile)

        return sample_outfile, input_outfile

    elif sample_nreads > input_nreads:
        E.info("SAMPLE bam has %s reads, INPUT bam has %s reads"
               % (sample_nreads, input_nreads))
        E.info("SAMPLE being downsampled to match INPUT")
        sample_outfile = normalize(sample_file,
                                   sample_nreads,
                                   sample_outfile,
                                   input_nreads)
        shutil.copyfile(input_file, input_outfile)
        pysam.index(input_outfile)

        return sample_outfile, input_outfile