How to use the kipoiseq.transforms.functional.translate function in kipoiseq

To help you get started, we’ve selected a few kipoiseq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kipoi / kipoiseq / tests / test_5_protein_dl.py View on Github external
#assert gps.transcripts.isin(dfp.index).all()

transcript_id = 'ENST00000485079'
div3_error = 0
seq_mismatch_err = 0
err_transcripts = []
for transcript_id in tqdm(gps.transcripts):
    # make sure all ids can be found in the proteome
    dna_seq = gps.get_seq(transcript_id)
    # dna_seq = dna_seq[:(len(dna_seq) // 3) * 3]
    if len(dna_seq) % 3 != 0:
        div3_error += 1
        print("len(dna_seq) % 3 != 0: {}".format(transcript_id))
        err_transcripts.append({"transcript_id": transcript_id, "div3_err": True})
        continue
    prot_seq = translate(dna_seq)
    if dfp.loc[transcript_id].seq != prot_seq:
        seq_mismatch_err += 1
        print("seq.mismatch: {}".format(transcript_id))
        n_mismatch = 0
        for i in range(len(prot_seq)):
            a = dfp.loc[transcript_id].seq[i]
            b = prot_seq[i]
            if a != b:
                n_mismatch += 1
                print("{} {} {}/{}".format(a,b,i,len(prot_seq)))
        err_transcripts.append({"transcript_id": transcript_id, "div3_err": False,
                                "n-seq-mismatch": n_mismatch})
        # print("prot:", dfp.loc[transcript_id].seq)
        # print("seq: ", prot_seq)
err_transcripts = pd.DataFrame(err_transcripts)
# err_cds.to_csv("data/protein/err_cds.csv")
github kipoi / kipoiseq / tests / test_5_protein_dl.py View on Github external
vcf_file = 'tests/data/singleSeq_vcf_ENST000000381176.vcf.gz'
    protein_file = 'tests/data/demo_proteins.pep.all.fa'
    txt_file = 'tests/data/dna_seq_ENST00000381176.txt'

    'Interval.strand = "-"'
    transcript_id = 'ENST00000381176'
    
    f = open(txt_file)
    control_seq_list = []
    for seq in f:
        control_seq_list.append(seq.replace('\n', ''))
        
    vs = SingleSeqAminoAcidVCFSeqExtractor(fasta_file, vcf_file, gtf_file)
    test_dna_seq = vs.extract(transcript_id)

    ref_dna_seq = translate(cut_seq(rc_dna("".join(control_seq_list))))
    assert test_dna_seq == ref_dna_seq, "Seq mismatch for Interval.strand = -"
github kipoi / kipoiseq / tests / test_5_protein_dl.py View on Github external
def extract(self, transcript_id, sample_id=None):
        intervals, strand = self.genome_cds_fetcher.get_cds_exons(transcript_id)
        intervals = self.strand_default(intervals)
        variant_interval_queryable = self.multi_sample_VCF.query_variants(intervals, sample_id=sample_id)

        for seq in self.extract_sinlge(variant_interval_queryable, intervals=intervals):

            seq = self.genome_cds_fetcher.prepare_seq(seq, strand)

            yield translate(seq)
github kipoi / kipoiseq / tests / test_5_protein_dl.py View on Github external
vcf_file = 'tests/data/singleVar_vcf_ENST000000381176.vcf.gz'
    protein_file = 'tests/data/demo_proteins.pep.all.fa'
    txt_file = 'tests/data/dna_seq_ENST00000319363.txt'


    'Interval.strand = "+"'
    transcript_id = 'ENST00000319363'
    f = open(txt_file)
    control_seq_list = []
    for seq in f:
        control_seq_list.append(seq.replace('\n', ''))
        
    vs = SingleSeqAminoAcidVCFSeqExtractor(fasta_file, vcf_file, gtf_file)
    test_dna_seq = vs.extract(transcript_id)

    assert test_dna_seq == translate(cut_seq("".join(control_seq_list))), "Seq mismatch for Interval.strand = +"
github kipoi / kipoiseq / tests / test_5_protein_dl.py View on Github external
def extract(self, transcript_id, sample_id=None):
        intervals, strand = self.genome_cds_fetcher.get_cds_exons(transcript_id)
        intervals = self.strand_default(intervals)

        variant_interval_queryable = self.multi_sample_VCF.query_variants(intervals, sample_id=sample_id)
        
        seq = self.extract_multiple(variant_interval_queryable)

        seq = self.genome_cds_fetcher.prepare_seq(seq, strand)
        
        return translate(seq)
github kipoi / kipoiseq / kipoiseq / extractors / protein.py View on Github external
def get_protein_seq(self, transcript_id: str):
        """
        Extract amino acid sequence for given transcript_id
        :param transcript_id: 
        :return: amino acid sequence
        """
        return translate(self.get_seq(transcript_id), hg38=True)
github kipoi / kipoiseq / kipoiseq / extractors / protein.py View on Github external
def _prepare_seq(seqs: List[str], strand: str, tag: str):
        """
        Prepare the dna sequence and translate it into amino acid sequence
        :param seqs: current dna sequence
        :param strand: dna strand, where the gene is located
        :param tag: tags, which contain information about ambiguous start/end
        :return: amino acid sequence
        """
        return translate(TranscriptSeqExtractor._prepare_seq(seqs, strand, tag), True)