How to use kipoiseq - 10 common examples

To help you get started, we’ve selected a few kipoiseq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kipoi / kipoiseq / tests / test_5_protein_dl.py View on Github external
#assert gps.transcripts.isin(dfp.index).all()

transcript_id = 'ENST00000485079'
div3_error = 0
seq_mismatch_err = 0
err_transcripts = []
for transcript_id in tqdm(gps.transcripts):
    # make sure all ids can be found in the proteome
    dna_seq = gps.get_seq(transcript_id)
    # dna_seq = dna_seq[:(len(dna_seq) // 3) * 3]
    if len(dna_seq) % 3 != 0:
        div3_error += 1
        print("len(dna_seq) % 3 != 0: {}".format(transcript_id))
        err_transcripts.append({"transcript_id": transcript_id, "div3_err": True})
        continue
    prot_seq = translate(dna_seq)
    if dfp.loc[transcript_id].seq != prot_seq:
        seq_mismatch_err += 1
        print("seq.mismatch: {}".format(transcript_id))
        n_mismatch = 0
        for i in range(len(prot_seq)):
            a = dfp.loc[transcript_id].seq[i]
            b = prot_seq[i]
            if a != b:
                n_mismatch += 1
                print("{} {} {}/{}".format(a,b,i,len(prot_seq)))
        err_transcripts.append({"transcript_id": transcript_id, "div3_err": False,
                                "n-seq-mismatch": n_mismatch})
        # print("prot:", dfp.loc[transcript_id].seq)
        # print("seq: ", prot_seq)
err_transcripts = pd.DataFrame(err_transcripts)
# err_cds.to_csv("data/protein/err_cds.csv")
github kipoi / kipoiseq / tests / extractors / test_vcf_seq_extractor.py View on Github external
interval = Interval('chr1', 4, 14)
    seq = variant_seq_extractor.extract(interval, variants, anchor=4)
    assert len(seq) == interval.end - interval.start
    assert seq == 'GAACGTAACG'

    interval = Interval('chr1', 2, 5)
    seq = variant_seq_extractor.extract(interval, variants, anchor=3)
    assert len(seq) == interval.end - interval.start
    assert seq == 'GCG'

    interval = Interval('chr1', 24, 34)
    seq = variant_seq_extractor.extract(interval, variants, anchor=27)
    assert len(seq) == interval.end - interval.start
    assert seq == 'TGATAACGTA'

    interval = Interval('chr1', 25, 35)
    seq = variant_seq_extractor.extract(interval, variants, anchor=34)
    assert len(seq) == interval.end - interval.start
    assert seq == 'TGATAACGTA'

    interval = Interval('chr1', 34, 44)
    seq = variant_seq_extractor.extract(interval, variants, anchor=37)
    assert len(seq) == interval.end - interval.start
    assert seq == 'AACGTAACGT'

    interval = Interval('chr1', 34, 44)
    seq = variant_seq_extractor.extract(interval, variants, anchor=100)
    assert len(seq) == interval.end - interval.start
    assert seq == 'AACGTAACGT'

    interval = Interval('chr1', 5, 11, strand='+')
    seq = variant_seq_extractor.extract(
github kipoi / kipoiseq / tests / extractors / test_vcf.py View on Github external
import pytest
from conftest import vcf_file, sample_5kb_fasta_file
from kipoiseq.dataclasses import Variant, Interval
from kipoiseq.extractors.vcf_query import NumberVariantQuery
from kipoiseq.extractors.vcf import MultiSampleVCF

fasta_file = sample_5kb_fasta_file

intervals = [
    Interval('chr1', 3, 10),
    Interval('chr1', 4, 30),
    Interval('chr1', 19, 30)
]


@pytest.fixture
def multi_sample_vcf():
    return MultiSampleVCF(vcf_file)


def test_MultiSampleVCF__next__(multi_sample_vcf):
    variant = next(multi_sample_vcf)
    assert variant.chrom == 'chr1'
    assert variant.pos == 4
    assert variant.ref == 'T'
    assert variant.alt == 'C'
github kipoi / kipoiseq / tests / extractors / test_vcf_seq_extractor.py View on Github external
def test_single_seq_vcf_seq_extract(single_seq_vcf_seq_extractor):
    interval = Interval('chr1', 2, 9)
    seq = single_seq_vcf_seq_extractor.extract(interval, anchor=3)
    assert seq == 'GCGAACG'
github kipoi / kipoiseq / tests / test_dataclasses.py View on Github external
interval.chrom = 'asd'
    with pytest.raises(AttributeError):
        interval.start = 10
    with pytest.raises(AttributeError):
        interval.end = 300
    with pytest.raises(AttributeError):
        interval.strand = '+'
    assert interval.strand == '-'

    # non-fixed arguments
    interval.name = 'asd'
    interval.score = 10

    assert interval.unstrand().strand == '.'

    assert interval == Interval.from_pybedtools(interval.to_pybedtools())
    assert isinstance(interval.to_pybedtools(), pybedtools.Interval)

    i2 = interval.shift(10, use_strand=False)

    # original unchanged
    assert interval.start == 10
    assert interval.end == 20

    assert i2.start == 20
    assert i2.end == 30

    i2 = interval.shift(10)  # use_strand = True by default
    assert i2.start == 0
    assert i2.end == 10

    assert not interval.shift(20, use_strand=True).is_valid()
github kipoi / kipoiseq / tests / extractors / test_vcf.py View on Github external
def test_MultiSampleVCF_fetch_variant(multi_sample_vcf):
    interval = Interval('chr1', 3, 5)
    assert len(list(multi_sample_vcf.fetch_variants(interval))) == 2
    assert len(list(multi_sample_vcf.fetch_variants(interval, 'NA00003'))) == 1
    assert len(list(multi_sample_vcf.fetch_variants(interval, 'NA00001'))) == 0

    interval = Interval('chr1', 7, 12)
    assert len(list(multi_sample_vcf.fetch_variants(interval))) == 0
    assert len(list(multi_sample_vcf.fetch_variants(interval, 'NA00003'))) == 0
github kipoi / kipoiseq / tests / extractors / test_vcf.py View on Github external
def test_MultiSampleVCF_fetch_variant(multi_sample_vcf):
    interval = Interval('chr1', 3, 5)
    assert len(list(multi_sample_vcf.fetch_variants(interval))) == 2
    assert len(list(multi_sample_vcf.fetch_variants(interval, 'NA00003'))) == 1
    assert len(list(multi_sample_vcf.fetch_variants(interval, 'NA00001'))) == 0

    interval = Interval('chr1', 7, 12)
    assert len(list(multi_sample_vcf.fetch_variants(interval))) == 0
    assert len(list(multi_sample_vcf.fetch_variants(interval, 'NA00003'))) == 0
github kipoi / kipoiseq / tests / extractors / test_vcf_query.py View on Github external
def variant_queryable():
    vcf = MultiSampleVCF(vcf_file)
    return VariantIntervalQueryable(vcf, [
        (
            [
                Variant('chr1', 12, 'A', 'T'),
                Variant('chr1', 18, 'A', 'C', filter='q10'),
            ],
            Interval('chr1', 10, 20)
        ),
        (
            [
                Variant('chr2', 120, 'AT', 'AAAT'),
            ],
            Interval('chr2', 110, 200)
        )
github kipoi / kipoiseq / tests / test_dataclasses.py View on Github external
def test_variant():
    v = Variant("chr1", 10, 'C', 'T')

    assert v.start == 9
    assert v.chrom == 'chr1'
    assert v.pos == 10
    assert v.ref == 'C'
    assert v.alt == 'T'
    assert isinstance(v.info, dict)
    assert len(v.info) == 0
    assert v.qual == 0
    assert v.filter == 'PASS'
    v.info['test'] = 10
    assert v.info['test'] == 10
    assert isinstance(str(v), str)

    # make sure the original got unchangd
    v2 = v.copy()
github kipoi / kipoiseq / tests / test_dataclasses.py View on Github external
with pytest.raises(AttributeError):
        v.chrom = 'asd'
    with pytest.raises(AttributeError):
        v.pos = 10
    with pytest.raises(AttributeError):
        v.ref = 'asd'
    with pytest.raises(AttributeError):
        v.alt = 'asd'

    # non-fixed arguments
    v.id = 'asd'
    v.qual = 10
    v.filter = 'asd'
    v.source = 2

    assert isinstance(Variant("chr1", '10', 'C', 'T').pos, int)

    # from cyvcf2
    vcf = cyvcf2.VCF('tests/data/test.vcf.gz')
    cv = list(vcf)[0]

    v2 = Variant.from_cyvcf(cv)
    assert isinstance(v2.source, cyvcf2.Variant)