How to use the biotite.sequence.NucleotideSequence function in biotite

To help you get started, we’ve selected a few biotite examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biotite-dev / biotite / tests / sequence / test_fasta.py View on Github external
fasta.set_sequences(file2, seq_dict)
    seq_dict2 = fasta.get_sequences(file2)
    assert seq_dict == seq_dict2
    
    file3 = fasta.FastaFile()
    fasta.set_sequence(file3, seq.NucleotideSequence("AACCTTGG"))
    assert file3["sequence"] == "AACCTTGG"
    
    path = os.path.join(data_dir("sequence"), "prot.fasta")
    file4 = fasta.FastaFile.read(path)
    assert seq.ProteinSequence("YAHGFRTGS") == fasta.get_sequence(file4)
    
    path = os.path.join(data_dir("sequence"), "invalid.fasta")
    file5 = fasta.FastaFile.read(path)
    with pytest.raises(ValueError):
        seq.NucleotideSequence(fasta.get_sequence(file5))
github biotite-dev / biotite / tests / sequence / test_sequence.py View on Github external
def test_encoding():
    string1 = "AATGCGTTA"
    dna = seq.NucleotideSequence(string1)
    string2 = str(dna)
    assert string1 == string2
github biotite-dev / biotite / tests / sequence / test_sequence.py View on Github external
def test_concatenation():
    str1 = "AAGTTA"
    str2 = "CGA"
    str3 = "NNN"
    concat_seq = seq.NucleotideSequence(str1) + seq.NucleotideSequence(str2)
    assert str1 + str2 == str(concat_seq)
    concat_seq = seq.NucleotideSequence(str1) + seq.NucleotideSequence(str3)
    assert str1 + str3 == str(concat_seq)
    concat_seq = seq.NucleotideSequence(str3) + seq.NucleotideSequence(str1)
    assert str3 + str1 == str(concat_seq)
github biotite-dev / biotite / tests / sequence / test_align.py View on Github external
def test_align_optimal_simple(local, term, gap_penalty,
                              input1, input2, expect):
    """
    Test `align_optimal()` function using constructed test cases.
    """
    seq1 = seq.NucleotideSequence(input1)
    seq2 = seq.NucleotideSequence(input2)
    matrix = align.SubstitutionMatrix.std_nucleotide_matrix()
    # Test alignment function
    alignments = align.align_optimal(seq1, seq2,
                       matrix,
                       gap_penalty=gap_penalty, terminal_penalty=term,
                       local=local)
    for ali in alignments:
        assert str(ali) in expect
    # Test if separate score function calculates the same score
    for ali in alignments:
        score = align.score(ali, matrix,
                            gap_penalty=gap_penalty, terminal_penalty=term)
        assert score == ali.score
github biotite-dev / biotite / tests / sequence / test_sequence.py View on Github external
def test_access():
    string = "AATGCGTTA"
    dna = seq.NucleotideSequence(string)
    assert string[2] == dna[2]
    assert string == "".join([symbol for symbol in dna])
    dna = dna[3:-2]
    assert "GCGT" == str(dna)
github biotite-dev / biotite / tests / sequence / test_align.py View on Github external
def test_align_ungapped():
    """
    Test `align_ungapped()` function.
    """
    seq1 = seq.NucleotideSequence("ACCTGA")
    seq2 = seq.NucleotideSequence("ACTGGT")
    matrix = align.SubstitutionMatrix.std_nucleotide_matrix()
    ali = align.align_ungapped(seq1, seq2, matrix)
    assert ali.score == 3
    assert str(ali) == "ACCTGA\nACTGGT"
github biotite-dev / biotite / tests / sequence / test_seqtypes.py View on Github external
def test_translation_met_start():
    """
    Test whether the start amino acid is replaced by methionine,
    i.e. the correct function of the 'met_start' parameter.
    """
    codon_table = seq.CodonTable.default_table().with_start_codons("AAA")
    dna = seq.NucleotideSequence("GAAACTGAAATAAGAAC")
    proteins, _ = dna.translate(codon_table=codon_table, met_start=True)
    assert [str(protein) for protein in proteins] == ["MLK*", "M*"]
github biotite-dev / biotite / doc / examples / scripts / sequence / codon_usage.py View on Github external
# Map the amino acid to the codon with maximum frequency
    opt_codons[amino_acid_code] = best_codon_code

# Fetch the streptavidin protein sequence from Streptomyces avidinii
fasta_file = fasta.FastaFile.read(
    entrez.fetch("P22629", None, "fasta", "protein", "fasta")
)
strep_prot_seq = fasta.get_sequence(fasta_file)
# Create a DNA sequence from the protein sequence
# using the optimal codons
strep_dna_seq = seq.NucleotideSequence()
strep_dna_seq.code = np.concatenate(
    [opt_codons[amino_acid_code] for amino_acid_code in strep_prot_seq.code]
)
# Add stop codon
strep_dna_seq += seq.NucleotideSequence("TAA")
# Put the DNA sequence into a FASTA file
fasta_file = fasta.FastaFile()
fasta_file["Codon optimized streptavidin"] = str(strep_dna_seq)
# Print the contents of the created FASTA file
print(fasta_file)
# In a real application it would be written onto the hard drive via
github biotite-dev / biotite / doc / tutorial / src / sequence.py View on Github external
print(cds_seq[:60], "...")

########################################################################
# Awesome.
# Now we can translate the sequence and compare it with the translation
# given by the CDS feature.
# But before we can do that, we have to prepare the data:
# The DNA sequence uses an ambiguous alphabet due to the nasty
# ``'M'`` at position 28 of the original sequence, we have to remove the
# stop symbol after translation and we need to remove the whitespace
# characters in the translation given by the CDS feature.

# To make alphabet unambiguous we create a new NucleotideSequence
# containing only the CDS portion, which is unambiguous
# Thus, the resulting NucleotideSequence has an unambiguous alphabet
cds_seq = seq.NucleotideSequence(cds_seq)
# Now we can translate the unambiguous sequence.
prot_seq = cds_seq.translate(complete=True)
print(prot_seq[:60], "...")
print(
    "Are the translated sequences equal?",
    # Remove stops of our translation
    (str(prot_seq.remove_stops()) == 
    # Remove whitespace characters from translation given by CDS feature
    cds_feature.qual["translation"].replace(" ", ""))
)

########################################################################
# Phylogenetic and guide trees
# ----------------------------
# 
# .. currentmodule:: biotite.sequence.phylo
github biotite-dev / biotite / doc / examples / scripts / sequence / anderson_logo.py View on Github external
import numpy as np
import matplotlib.pyplot as plt
import biotite.sequence as seq
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics

# The list of Anderson promoters
seqs = [seq.NucleotideSequence("ttgacagctagctcagtcctaggtataatgctagc"),
        seq.NucleotideSequence("ttgacagctagctcagtcctaggtataatgctagc"),
        seq.NucleotideSequence("tttacagctagctcagtcctaggtattatgctagc"),
        seq.NucleotideSequence("ttgacagctagctcagtcctaggtactgtgctagc"),
        seq.NucleotideSequence("ctgatagctagctcagtcctagggattatgctagc"),
        seq.NucleotideSequence("ttgacagctagctcagtcctaggtattgtgctagc"),
        seq.NucleotideSequence("tttacggctagctcagtcctaggtactatgctagc"),
        seq.NucleotideSequence("tttacggctagctcagtcctaggtatagtgctagc"),
        seq.NucleotideSequence("tttacggctagctcagccctaggtattatgctagc"),
        seq.NucleotideSequence("ctgacagctagctcagtcctaggtataatgctagc"),
        seq.NucleotideSequence("tttacagctagctcagtcctagggactgtgctagc"),
        seq.NucleotideSequence("tttacggctagctcagtcctaggtacaatgctagc"),
        seq.NucleotideSequence("ttgacggctagctcagtcctaggtatagtgctagc"),
        seq.NucleotideSequence("ctgatagctagctcagtcctagggattatgctagc"),
        seq.NucleotideSequence("ctgatggctagctcagtcctagggattatgctagc"),
        seq.NucleotideSequence("tttatggctagctcagtcctaggtacaatgctagc"),
        seq.NucleotideSequence("tttatagctagctcagcccttggtacaatgctagc"),
        seq.NucleotideSequence("ttgacagctagctcagtcctagggactatgctagc"),
        seq.NucleotideSequence("ttgacagctagctcagtcctagggattgtgctagc"),
        seq.NucleotideSequence("ttgacggctagctcagtcctaggtattgtgctagc")]
# Sequences do not need to be aligned
# -> Create alignment with trivial trace
# [[0 0 0 ...]
#  [1 1 1 ...]