How to use the pysam.asTuple function in pysam

To help you get started, we’ve selected a few pysam examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PubuduSaneth / cnvScan / src / cnvScan_VarFilt.py View on Github external
continue

                    try:
                        row = OrderedDict(zip(header, line.split('\t')))
                    except:
                        continue

                    if self.genelist:
                        PIDD_GENE = []
                        Inheritance = []
                        Phenotype = []
                        try:
                            for genepanel_line in gene_list.fetch(row['#chr'],
                                                                  int(row['start']),
                                                                  int(row['end']),
                                                                  parser=pysam.asTuple()):
                                PIDD_GENE += [genepanel_line[3]]
                                Inheritance += [genepanel_line[4]]
                                Phenotype += [genepanel_line[5]]
                            row['PIDD_GENE'] = "|".join(PIDD_GENE) if PIDD_GENE else 'NA'
                            row['Inheritance'] = "|".join(Inheritance) if Inheritance else 'NA'
                            row['Phenotype'] ="|".join(Phenotype) if Phenotype else 'NA'
                        except ValueError:
                            pass

                    if self.filter_line(row):
                        fout.write('\t'.join(row.values()) + '\n')
github colomemaria / epiScanpy / episcanpy / count_matrix / _bld_atac_mtx.py View on Github external
tbx = pysam.TabixFile(tsv_file)

    # format annotations
    window_list = []

    if genome:
        for chrom in sorted(annotation.keys()):
            window_list += [["".join([genome, '_chr', chrom]), int(n[0]), int(n[1])] for n in annotation[chrom]]
    else:
        for chrom in sorted(annotation.keys()):
            window_list += [["".join(['chr', chrom]), int(n[0]), int(n[1])] for n in annotation[chrom]]

    print('building count matrix')
    mtx = lil_matrix((nb_barcodes, len(window_list)), dtype=np.float32)
    for i, tmp_feat in enumerate(tqdm(window_list)):
        for row in tbx.fetch(tmp_feat[0], tmp_feat[1], tmp_feat[2], parser=pysam.asTuple()):
            mtx[dict_barcodes[str(row).split('\t')[-2]], i] += 1

    print('building AnnData object')
    mtx = ad.AnnData(mtx.tocsr(),
                     obs=pd.DataFrame(index=barcodes),
                     var=pd.DataFrame(index=['_'.join([str(p) for p in n]) for n in window_list]))

    if csv_file:
        print('filtering barcodes')
        df = pd.read_csv(csv_file)
        if genome == 'mm10':
            df_filtered = df[(df.is_mm10_cell_barcode == 1)]
        elif genome == 'hg19':
            df_filtered = df[(df.is_hg19_cell_barcode == 1)]
        else:
            df_filtered = df[(df.is_cell_barcode == 1)]
github henryjuho / parus_indel / summary_analyses / wga2phy.py View on Github external
else:
                skip = False

        # if a header line
        if line.startswith('>'):

            # process prev sequence
            if sequence != '' and len(sequence) % 3 == 0 and start_stop_ok(sequence):

                align_dict = {}

                # extract positions from alignment
                for codon in [coords[i: i + 3] for i in range(0, len(coords), 3)]:
                    codon_dict = {}
                    for pos in codon:
                        align_pos = list(wga.fetch(chromo, pos - 1, pos, parser=pysam.asTuple()))

                        # get align data in form [('dmel', 'T'), ('dsim', '?'), ('dyak', 'T')]

                        try:
                            spp_bases = zip(align_pos[0][4].split(','), align_pos[0][7].split(','))
                            for spp in spp_bases:
                                if spp[0] not in codon_dict.keys():
                                    codon_dict[spp[0]] = ''
                                if len(align_pos) == 0:
                                    codon_dict[spp[0]] += 'N'
                                else:
                                    codon_dict[spp[0]] += spp[1][0]
                        except IndexError:
                            continue

                    # if entire codon is not in alignment skip
github broadinstitute / cms / cms / cms / util / old / vcf.py View on Github external
    def __init__(self, inFile, parser=pysam.asTuple()):
        # because of odd Cython weirdness, we don't actually want to call super.__init__ here..
        #super(TabixReader, self).__init__(inFile, parser=parser)
        self.parser = parser
    def __enter__(self):
github arq5x / gemini / gemini / annotations.py View on Github external
except IOError:
            raise IOError("Gemini cannot open this annotation file: %s. \n"
                          "Have you installed the annotation files?  If so, "
                          "have they been moved or deleted? Exiting...\n\n"
                          "For more details:\n\t"
                          "http://gemini.readthedocs.org/en/latest/content/"
                          "#installation.html\#installing-annotation-files\n"
                          % anno_files[anno])

# ## Standard access to Tabix indexed files


PARSERS = {"bed": pysam.asBed(),
           "vcf": pysam.asVCF(),
           "tuple": pysam.asTuple(),
           None: None}

def _get_hits(coords, annotation, parser_type, _parsers=PARSERS):
    """Retrieve BED information, recovering if BED annotation file does have a chromosome.
    """
    try:
        parser = _parsers[parser_type]
    except KeyError:
        raise ValueError("Unexpected parser type: %s" % parser)
    chrom, start, end = coords
    if isinstance(annotation, pysam.VariantFile):
        return annotation.fetch(chrom, start, end)
    elif isinstance(annotation, cyvcf2.VCF):
        return annotation("%s:%d-%d" % (chrom, start - 1, end))
    try:
        hit_iter = annotation.fetch(str(chrom), start, end, parser=parser)
github broadinstitute / viral-ngs / util / vcf.py View on Github external
    def __init__(self, inFile, parser=pysam.asTuple()):
        # inFile is passed in but is never used, and yet the TabixReader works. How?!
        # This inFile magic is because Tabixfile is a Cython object that uses def __cinit__ 
        # rather than def __init__; the former is called automatically exactly once for the 
        # base class prior to any use of __init__. Use of subsequent __init__ should obey 
        # normal inheritance rules (assuming inheritance from object and it's not an old-style class). 
        # So __cinit__ sets the input file, and then our __init__ (which doesn't override a base method) 
        # is called and sets the parser. 
        # This could all break in the future if pysam moves away from __cinit__, but doing so would
        # reduce performance and so it seems unlikely.
        # See:
        #   https://github.com/cython/cython/blob/master/docs/src/userguide/special_methods.rst#id19
        #   https://github.com/pysam-developers/pysam/blob/master/pysam/libctabix.pyx#L331
        #super(TabixReader, self).__init__(inFile, parser=parser)
        self.parser = parser
github churchill-lab / g2gtools / g2gtools / fasta_transform.py View on Github external
#LOG.debug("index of '>' is {}".format(new_sequence.getvalue().find('>')))

        start = mappings[0].from_start
        LOG.debug("Setting start to {} (mappings[0].from_start)".format(start))

        found = False
        new_sequence_len = 0

        LOG.debug('start={}'.format(start))
        LOG.debug('last_pos={}'.format(last_pos))


        LOG.debug("VCI Fetch {}".format(fasta_transform_params.vci_query))
        local_vci_file = vci.VCIFile(fasta_transform_params.vci_file)
        for line in local_vci_file.fetch(fasta_transform_params.vci_query, parser=pysam.asTuple()):
            aline = line

            if line[5] == '.':
                continue

            found = True
            LOG.debug('')
            LOG.debug("LINE: {}".format(line))
            #new_sequence_value = new_sequence.getvalue()

            #if len(new_sequence_value) > 50:
            #    LOG.debug('current={}...{}'.format(new_sequence_value[:25], new_sequence_value[-25:]))
            #else:
            #    LOG.debug('current={}'.format(new_sequence_value))

            # chromosome, position, shared_bases, deleted_bases, inserted_bases, fragment_size
github churchill-lab / g2gtools / g2gtools / vci_parse.py View on Github external
tree = IntervalTree()

    try:

        if line_no % 10000 == 0:
            print("CONTIG {} {}".format(contig, line_no))

        pos_from = 0
        pos_to = 0

        tabix_file = pysam.TabixFile(filename_vci)

        iterator = None

        try:
            iterator = tabix_file.fetch(contig, parser=pysam.asTuple())
        except:
            LOG.debug("Exception for {}".format(contig))

        if iterator is None:
            LOG.debug("No iterator")
            return ret

        for rec in iterator:
            if len(rec) != 6:
                raise exceptions.G2GError("Unexpected line in G2G file. Line #{0:,}: {1}".format(line_no, rec))

            if rec[2] == '.':
                continue

            """
            1 3000019 G . A 3000019