How to use pyensembl - 10 common examples

To help you get started, we’ve selected a few pyensembl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openvax / varcode / test / test_vcf.py View on Github external
def test_pandas_and_pyvcf_implementations_equivalent():
    paths = [
        {'path': data_path("somatic_hg19_14muts.vcf")},
        {'path': data_path("somatic_hg19_14muts.space_in_sample_name.vcf")},
        {'path': "/" + data_path("somatic_hg19_14muts.vcf")},
        {'path': data_path("somatic_hg19_14muts.vcf.gz")},
        {'path': data_path("multiallelic.vcf")},
        {'path': data_path("mutect-example.vcf")},
        {'path': data_path("strelka-example.vcf")},
        {'path': data_path("mutect-example-headerless.vcf"),
            'genome': cached_release(75)},
    ]
    if RUN_TESTS_REQUIRING_INTERNET:
        paths.append({'path': VCF_EXTERNAL_URL})
        paths.append({'path': VCF_EXTERNAL_URL + ".gz"})

    def do_test(kwargs):
        vcf_pandas = load_vcf_fast(**kwargs)
        vcf_pyvcf = load_vcf(**kwargs)
        eq_(vcf_pandas, vcf_pyvcf)
        eq_(len(vcf_pandas), len(vcf_pyvcf))
        eq_(vcf_pandas.elements, vcf_pyvcf.elements)
        eq_(vcf_pandas.metadata, vcf_pyvcf.metadata)
        assert len(vcf_pandas) > 1
        assert len(vcf_pyvcf) > 1

    for kwargs in paths:
github openvax / pyensembl / test / test_release_versions.py View on Github external
def test_version_too_old_47():
    EnsemblRelease(47)
github openvax / pyensembl / test / test_attributes.py View on Github external
from pyensembl import EnsemblRelease

ensembl75 = EnsemblRelease(75)


def test_gene_ids():
    # only load chromosome 1 to speed up tests
    df = ensembl75.dataframe(contig="1")
    assert 'gene_id' in df
    # Ensembl gene ids are formatted like ENSG00000223972
    # which is always length 15
    assert (df['gene_id'].str.len() == 15).all(), \
        df[df['gene_id'].str.len() != 15]
github openvax / pyensembl / test / test_release_versions.py View on Github external
def test_version_is_not_numeric():
    EnsemblRelease("wuzzle")
github openvax / pyensembl / test / test_release_versions.py View on Github external
def test_version_is_none():
    EnsemblRelease(None)
github openvax / pyensembl / test / test_locus.py View on Github external
assert normalize_chromosome("chrmt") == "chrMT"

    with assert_raises(TypeError):
        normalize_chromosome({"a": "b"})

    with assert_raises(TypeError):
        normalize_chromosome([])

    with assert_raises(TypeError):
        normalize_chromosome(None)

    with assert_raises(ValueError):
        normalize_chromosome("")

    with assert_raises(ValueError):
        normalize_chromosome(0)
github openvax / pyensembl / test / test_locus.py View on Github external
def test_normalize_chromosome():
    assert normalize_chromosome("X") == "X"
    assert normalize_chromosome("chrX") == "chrX"

    assert normalize_chromosome("x") == "X"
    assert normalize_chromosome("chrx") == "chrX"

    assert normalize_chromosome(1) == "1"
    assert normalize_chromosome("1") == "1"
    assert normalize_chromosome("chr1") == "chr1"

    assert normalize_chromosome("chrM") == "chrM"
    assert normalize_chromosome("chrMT") == "chrMT"
    assert normalize_chromosome("M") == "M"
    assert normalize_chromosome("MT") == "MT"
    assert normalize_chromosome("m") == "M"
    assert normalize_chromosome("chrm") == "chrM"
    assert normalize_chromosome("mt") == "MT"
    assert normalize_chromosome("chrmt") == "chrMT"

    with assert_raises(TypeError):
        normalize_chromosome({"a": "b"})

    with assert_raises(TypeError):
        normalize_chromosome([])
github openvax / isovar / test / test_translation_helpers.py View on Github external
def test_TP53_translation_from_cdna():
    tp53_001 = ensembl_grch38.transcripts_by_name("TP53-001")[0]
    cdna = tp53_001.coding_sequence
    amino_acids = translate_cdna(cdna, first_codon_is_start=True)
    eq_(amino_acids, tp53_001.protein_sequence)
github openvax / isovar / test / test_reference_sequence_key.py View on Github external
def test_sequence_key_for_variant_on_transcript_deletion_reverse_strand():
    # delete start codon of TP53-001, which in reverse complement means
    # deleting the sequence "CAT"
    tp53_deletion = Variant(
        "17", 7676592, "CAT", "", ensembl_grch38)
    tp53_001 = ensembl_grch38.transcripts_by_name("TP53-001")[0]
    # Sequence of TP53 around start codon with 10 context nucleotides:
    # In [51]: t.sequence[190-10:190+13]
    # Out[51]: 'GGTCACTGCC_ATG_GAGGAGCCGC'
    eq_(tp53_001.sequence[190 - 10:190 + 13], "GGTCACTGCCATGGAGGAGCCGC")

    # get the 5 nucleotides before the variant and 10 nucleotides after
    sequence_key = ReferenceSequenceKey.from_variant_and_transcript(
        variant=tp53_deletion,
        transcript=tp53_001,
        context_size=10)

    expected_sequence_key = ReferenceSequenceKey(
        strand="-",
        sequence_before_variant_locus="GGTCACTGCC",
        sequence_at_variant_locus="ATG",
        sequence_after_variant_locus="GAGGAGCCGC")
github openvax / isovar / test / test_reference_coding_sequence_key.py View on Github external
def test_reference_coding_sequence_key_insertion_inside_start_codon():
    # insert nucleotide "C" in the middle of the start codon of TP53-001,
    # keeping only 1 nucleotide of context. In the reverse complement this
    # becomes 'T'>'TG'
    tp53_insertion = Variant(
        "17", 7676592, "T", "TG", ensembl_grch38)

    tp53_001 = ensembl_grch38.transcripts_by_name("TP53-001")[0]

    result = ReferenceCodingSequenceKey.from_variant_and_transcript(
        variant=tp53_insertion,
        transcript=tp53_001,
        context_size=1)
    assert result is None, "Expected result to be None when variant affects start codon"