How to use the biotite.sequence.ProteinSequence function in biotite

To help you get started, we’ve selected a few biotite examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biotite-dev / biotite / tests / application / test_msa.py View on Github external
def sequences():
    return [seq.ProteinSequence(string) for string in [
        "BIQTITE",
        "TITANITE",
        "BISMITE",
        "IQLITE"
]]
github biotite-dev / biotite / doc / examples / scripts / sequence / color_schemes_protein.py View on Github external
# (the same as in the bacterial luciferase example)
query =   entrez.SimpleQuery("luxA", "Gene Name") \
        & entrez.SimpleQuery("srcdb_swiss-prot", "Properties")
uids = entrez.search(query, db_name="protein")
fasta_file = fasta.FastaFile.read(entrez.fetch_single_file(
    uids, None, db_name="protein", ret_type="fasta"
))
sequences = [seq.ProteinSequence(seq_str) for seq_str in fasta_file.values()]
matrix = align.SubstitutionMatrix.std_protein_matrix()
alignment, order, _, _ = align.align_multiple(sequences, matrix)
# Order alignment according to the guide tree
alignment = alignment[:, order]
alignment = alignment[220:300]

# Get color scheme names
alphabet = seq.ProteinSequence.alphabet
schemes = [
    "rainbow", "clustalx",
    "flower", "blossom", "spring", "wither", "autumn", "sunset", "ocean",
    "zappo", "taylor", "buried", "hydrophobicity",
    "prophelix", "propstrand", "propturn"
]
count = len(schemes)
# Assert that this example displays all available amino acid color schemes
all_schemes = graphics.list_color_scheme_names(alphabet)
assert set(schemes) == set(all_schemes)


# Visualize each scheme using the example alignment
fig = plt.figure(figsize=(8.0, count*2.0))
gridspec = GridSpec(2, count)
for i, name in enumerate(schemes):
github biotite-dev / biotite / doc / examples / scripts / sequence / avidin_alignment.py View on Github external
import biotite.sequence as seq
import biotite.sequence.align as align
import biotite.sequence.io.fasta as fasta
import biotite.database.entrez as entrez
import biotite.sequence.graphics as graphics

# Download and parse protein sequences of avidin and streptavidin
file_name = entrez.fetch_single_file(["CAC34569", "ACL82594"],
                                     biotite.temp_file("sequences.fasta"),
                                     "protein", "fasta")
file = fasta.FastaFile.read(file_name)
for name, sequence in file.items():
    if "CAC34569" in name:
        avidin_seq = seq.ProteinSequence(sequence)
    elif "ACL82594" in name:
        streptavidin_seq = seq.ProteinSequence(sequence)
# Get BLOSUM62 matrix
matrix = align.SubstitutionMatrix.std_protein_matrix()
# Perform pairwise sequence alignment with affine gap penalty
# Terminal gaps are not penalized
alignments = align.align_optimal(avidin_seq, streptavidin_seq, matrix,
                                 gap_penalty=(-10, -1), terminal_penalty=False)
# Draw first and only alignment
# The color intensity indicates the similiarity
fig = plt.figure(figsize=(8.0, 2.5))
ax = fig.add_subplot(111)
graphics.plot_alignment_similarity_based(
    ax, alignments[0], matrix=matrix, labels=["Avidin", "Streptavidin"],
    show_numbers=True, show_line_position=True
)
fig.tight_layout()
github biotite-dev / biotite / doc / examples / scripts / sequence / avidin_alignment.py View on Github external
import matplotlib.pyplot as plt
import biotite
import biotite.sequence as seq
import biotite.sequence.align as align
import biotite.sequence.io.fasta as fasta
import biotite.database.entrez as entrez
import biotite.sequence.graphics as graphics

# Download and parse protein sequences of avidin and streptavidin
file_name = entrez.fetch_single_file(["CAC34569", "ACL82594"],
                                     biotite.temp_file("sequences.fasta"),
                                     "protein", "fasta")
file = fasta.FastaFile.read(file_name)
for name, sequence in file.items():
    if "CAC34569" in name:
        avidin_seq = seq.ProteinSequence(sequence)
    elif "ACL82594" in name:
        streptavidin_seq = seq.ProteinSequence(sequence)
# Get BLOSUM62 matrix
matrix = align.SubstitutionMatrix.std_protein_matrix()
# Perform pairwise sequence alignment with affine gap penalty
# Terminal gaps are not penalized
alignments = align.align_optimal(avidin_seq, streptavidin_seq, matrix,
                                 gap_penalty=(-10, -1), terminal_penalty=False)
# Draw first and only alignment
# The color intensity indicates the similiarity
fig = plt.figure(figsize=(8.0, 2.5))
ax = fig.add_subplot(111)
graphics.plot_alignment_similarity_based(
    ax, alignments[0], matrix=matrix, labels=["Avidin", "Streptavidin"],
    show_numbers=True, show_line_position=True
)
github biotite-dev / biotite / doc / examples / scripts / sequence / thca_synthase_polymorphism.py View on Github external
for gb_file in multi_file:
    annotation = gb.get_annotation(gb_file)
    
    # Find ID of strain in 'source' feature
    strain = None
    for feature in annotation:
        if feature.key == "source":
            strain = int(feature.qual["strain"])
    assert strain is not None
    
    # Find corresponding protein sequence in 'CDS' feature
    sequence = None
    for feature in annotation:
        if feature.key == "CDS":
            sequence = seq.ProteinSequence(
                # Remove whitespace in sequence
                # resulting from line breaks
                feature.qual["translation"].replace(" ", "")
            )
    assert sequence is not None

    sequences[strain] = sequence


# None of the THCA synthase variants have an insertion or deletion
# -> each one should have the same sequence length
seq_len = len(list(sequences.values())[0])
for sequence in sequences.values():
    assert len(sequence) == seq_len

# Create consensus sequences for the drug-type and fiber-type cannabis
github biotite-dev / biotite / doc / tutorial / src / application.py View on Github external
# ----------------------------
#
# .. currentmodule:: biotite.application.muscle
#
# For *multiple sequence alignments* (MSAs) :mod:`biotite.application`
# offers several interfaces to MSA software.
# For our example we choose the software MUSCLE:
# The subpackage :mod:`biotite.application.muscle` contains the class
# :class:`MuscleApp` that does the job.
# You simply input the sequences you want to have aligned, run the
# application and get the resulting :class:`Alignment` object.

import biotite.application.muscle as muscle
import biotite.sequence as seq

seq1 = seq.ProteinSequence("BIQTITE")
seq2 = seq.ProteinSequence("TITANITE")
seq3 = seq.ProteinSequence("BISMITE")
seq4 = seq.ProteinSequence("IQLITE")
app = muscle.MuscleApp([seq1, seq2, seq3, seq4])
app.start()
app.join()
alignment = app.get_alignment()
print(alignment)

########################################################################
# In most MSA software even more information than the mere alignment can
# be extracted.
# For instance the guide tree, that was used for the alignment, can be
# obtained from the MUSCLE output.

import matplotlib.pyplot as plt
github biotite-dev / biotite / doc / tutorial / src / application.py View on Github external
# :class:`Application` classes in depth.
# 
# Finding homologous sequences with BLAST
# ---------------------------------------
#
# .. currentmodule:: biotite.application.blast
#
# the :mod:`biotite.application.blast` subpackage provides an
# interface to NCBI BLAST: the :class:`BlastWebApp` class.
# Let's dive directly into the code, we try to find
# homologous sequences to the miniprotein *TC5b*:

import biotite.application.blast as blast
import biotite.sequence as seq

tc5b_seq = seq.ProteinSequence("NLYIQWLKDGGPSSGRPPPS")
app = blast.BlastWebApp("blastp", tc5b_seq)
app.start()
app.join()
alignments = app.get_alignments()
best_ali = alignments[0]
print(best_ali)
print()
print("HSP position in query: ", best_ali.query_interval)
print("HSP position in hit: ", best_ali.hit_interval)
print("Score: ", best_ali.score)
print("E-value: ", best_ali.e_value)
print("Hit UID: ", best_ali.hit_id)
print("Hit name: ", best_ali.hit_definition)

########################################################################
# This was too simple for BLAST:
github biotite-dev / biotite / doc / examples / scripts / sequence / blosum_dendrogram.py View on Github external
# Finally the obtained (phylogenetic) tree is plotted as dendrogram.
def get_distance(similarities, i, j):
    s_max = (similarities[i,i] + similarities[j,j]) / 2
    return s_max - similarities[i,j]

distances = np.zeros(similarities.shape)
for i in range(distances.shape[0]):
    for j in range(distances.shape[1]):
        distances[i,j] = get_distance(similarities, i, j)

tree = phylo.upgma(distances)

fig = plt.figure(figsize=(8.0, 5.0))
ax = fig.add_subplot(111)
# Use the 3-letter amino acid code aa label
labels = [seq.ProteinSequence.convert_letter_1to3(letter).capitalize()
          for letter in matrix.get_alphabet1()]
graphics.plot_dendrogram(
    ax, tree, orientation="top", labels=labels
)
ax.set_ylabel("Distance")
# Add grid for clearer distance perception
ax.yaxis.grid(color="lightgray")
plt.show()
github biotite-dev / biotite / doc / tutorial / src / application.py View on Github external
#
# .. currentmodule:: biotite.application.muscle
#
# For *multiple sequence alignments* (MSAs) :mod:`biotite.application`
# offers several interfaces to MSA software.
# For our example we choose the software MUSCLE:
# The subpackage :mod:`biotite.application.muscle` contains the class
# :class:`MuscleApp` that does the job.
# You simply input the sequences you want to have aligned, run the
# application and get the resulting :class:`Alignment` object.

import biotite.application.muscle as muscle
import biotite.sequence as seq

seq1 = seq.ProteinSequence("BIQTITE")
seq2 = seq.ProteinSequence("TITANITE")
seq3 = seq.ProteinSequence("BISMITE")
seq4 = seq.ProteinSequence("IQLITE")
app = muscle.MuscleApp([seq1, seq2, seq3, seq4])
app.start()
app.join()
alignment = app.get_alignment()
print(alignment)

########################################################################
# In most MSA software even more information than the mere alignment can
# be extracted.
# For instance the guide tree, that was used for the alignment, can be
# obtained from the MUSCLE output.

import matplotlib.pyplot as plt
import biotite.sequence.graphics as graphics