How to use the pysam.tabix_index function in pysam

To help you get started, we’ve selected a few pysam examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github svviz / svviz / src / svviz / tabix.py View on Github external
def ensureIndexed(bedPath, preset="bed", trySorting=True):
    if not bedPath.endswith(".gz"):
        if not os.path.exists(bedPath+".gz"):
            logging.info("bgzf compressing {}".format(bedPath))
            pysam.tabix_compress(bedPath, bedPath+".gz")
            if not os.path.exists(bedPath+".gz"):
                raise Exception("Failed to create compress {preset} file for {file}; make sure the {preset} file is "
                    "sorted and the directory is writeable".format(preset=preset, file=bedPath))
        bedPath += ".gz"
    if not os.path.exists(bedPath+".tbi"):
        logging.info("creating tabix index for {}".format(bedPath))
        pysam.tabix_index(bedPath, preset=preset)
        if not os.path.exists(bedPath+".tbi"):
            raise Exception("Failed to create tabix index file for {file}; make sure the {preset} file is "
                "sorted and the directory is writeable".format(preset=preset, file=bedPath))

    line = next(pysam.Tabixfile(bedPath).fetch())
    if len(line.strip().split("\t")) < 6 and preset == "bed":
        raise AnnotationError("BED files need to have at least 6 (tab-delimited) fields (including "
            "chrom, start, end, name, score, strand; score is unused)")
    if len(line.strip().split("\t")) < 9 and preset == "gff":
        raise AnnotationError("GFF/GTF files need to have at least 9 tab-delimited fields")

    return bedPath
github ga4gh / ga4gh-server / scripts / prepare_compliance_data.py View on Github external
def addVariantSet(
            self, variantFileName, dataset, referenceSet,
            ontology, biosamples):
        inputVcf = os.path.join(
            self.inputDirectory, variantFileName)
        outputVcf = os.path.join(
            self.outputDirectory, variantFileName)
        shutil.copy(inputVcf, outputVcf)
        pysam.tabix_index(outputVcf, preset="vcf")
        variantSet = variants.HtslibVariantSet(
            dataset, variantFileName.split('_')[1])
        variantSet.setReferenceSet(referenceSet)
        variantSet.populateFromFile(
            [os.path.abspath(outputVcf + ".gz")],
            [os.path.abspath(outputVcf + ".gz.tbi")])
        variantSet.checkConsistency()
        for callSet in variantSet.getCallSets():
            for biosample in biosamples:
                if biosample.getLocalId() == callSet.getLocalId():
                    callSet.setBiosampleId(biosample.getId())
        self.repo.insertVariantSet(variantSet)

        for annotationSet in variantSet.getVariantAnnotationSets():
            annotationSet.setOntology(ontology)
            self.repo.insertVariantAnnotationSet(annotationSet)
github RahmanTeam / CAVA / bin / dbSNPDB.py View on Github external
def indexFile(options):
    sys.stdout.write('Compressing output file ... ')
    sys.stdout.flush()
    pysam.tabix_compress(options.output, options.output + '.gz', force=True)
    sys.stdout.write('OK\n')
    sys.stdout.write('Indexing output file ... ')
    sys.stdout.flush()
    pysam.tabix_index(options.output + '.gz', seq_col=1, start_col=2, end_col=2, meta_char='#', force=True)
    sys.stdout.write('OK\n')
github broadinstitute / viral-ngs / tools / snpeff.py View on Github external
if not genomeToUse:
            raise Exception()

        args = [
            '-treatAllAsProteinCoding', 'false', '-t', '-noLog', '-ud', '0', '-noStats', '-noShiftHgvs', genomeToUse,
            os.path.realpath(inVcf)
        ]

        command_ps = self.execute('ann', args, JVMmemory=JVMmemory)
        if command_ps.returncode == 0:
            with open(tmpVcf, 'wt') as outf:
               outf.write(command_ps.stdout.decode("utf-8"))

            if outVcf.endswith('.vcf.gz'):
                pysam.tabix_compress(tmpVcf, outVcf, force=True)
                pysam.tabix_index(outVcf, force=True, preset='vcf')
                os.unlink(tmpVcf)
        else:
            raise subprocess.CalledProcessError(cmd=command_ps.args, returncode=command_ps.returncode, output=command_ps.stdout)
github 10XGenomics / cellranger / tenkit / lib / python / tenkit / tabix.py View on Github external
def index_vcf(filename):
    pysam.tabix_index(filename, preset='vcf', force=True)
github bioinform / varsim / utils.py View on Github external
def index_vcf_gz(vcf_gz):
    pysam.tabix_index(vcf_gz, force = True, preset = 'vcf')
github sanger-pathogens / ariba / ariba / samtools_variants.py View on Github external
print(pysam.mpileup(
                '-t', 'INFO/AD,INFO/ADF,INFO/ADR',
                '-L', '99999999',
                '-A',
                '-f', self.ref_fa,
                '-u',
                '-v',
                self.bam,
            ), end='', file=f)

        got = vcfcall_ariba.vcfcall_ariba(tmp_vcf, self.outprefix, self.min_var_read_depth, self.min_second_var_read_depth, self.max_allele_freq)
        if got != 0:
            raise Error('Error parsing vcf file. Cannot contine')

        pysam.tabix_compress(self.outprefix + '.read_depths', self.read_depths_file)
        pysam.tabix_index(self.read_depths_file, seq_col=0, start_col=1, end_col=1)
        os.unlink(self.outprefix + '.read_depths')
        os.unlink(tmp_vcf)
github bioinform / metasv / scripts / svtool_to_vcf.py View on Github external
else:
            vcf_writer.write_record(vcf_record)

    if sort:
        if reference_contigs:
            contigs_order_dict = {contig.name: index for (index, contig) in enumerate(reference_contigs)}
            vcf_records.sort(
                cmp=lambda x, y: cmp((contigs_order_dict[x.CHROM], x.POS), (contigs_order_dict[y.CHROM], y.POS)))
        else:
            vcf_records.sort(cmp=lambda x, y: cmp((x.CHROM, x.POS), (y.CHROM, y.POS)))

        for vcf_record in vcf_records:
            vcf_writer.write_record(vcf_record)
    vcf_writer.close()
    if out_vcf and index:
        pysam.tabix_index(out_vcf, force=True, preset='vcf')