Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
align.align_to_ref(),
align.progressive_align(model="GY94"),
sample.fixed_length(100),
sample.min_length(100),
io.write_seqs(os.getcwd()),
sample.omit_bad_seqs(),
sample.omit_degenerates(),
sample.take_codon_positions(1),
sample.take_named_seqs(),
sample.trim_stop_codons(gc=1),
]
return applications
class FastSlowDistTests(TestCase):
seqs1 = make_unaligned_seqs(_seqs1, moltype=DNA)
seqs2 = make_unaligned_seqs(_seqs2, moltype=DNA)
seqs3 = make_unaligned_seqs(_seqs3, moltype=DNA)
seqs4 = make_unaligned_seqs(_seqs4, moltype=DNA)
seqs5 = make_unaligned_seqs(_seqs5, moltype=PROTEIN)
def test_init(self):
"""tests if fast_slow_dist can be initialised correctly"""
fast_slow_dist = dist_app.fast_slow_dist(fast_calc="hamming", moltype="dna")
self.assertIsInstance(fast_slow_dist.fast_calc, HammingPair)
self.assertIsNone(fast_slow_dist._sm)
fast_slow_dist = dist_app.fast_slow_dist(distance="TN93")
self.assertIsInstance(fast_slow_dist.fast_calc, TN93Pair)
self.assertEqual(fast_slow_dist._sm.name, "TN93")
fast_slow_dist = dist_app.fast_slow_dist(distance="GTR")
_codon_models = [
"CNFGTR",
"CNFHKY",
"MG94HKY",
"MG94GTR",
"GY94",
"H04G",
"H04GK",
"H04GGK",
"GNC",
]
class RefalignmentTests(TestCase):
seqs = make_unaligned_seqs(_seqs, moltype=DNA)
treestring = "(Bandicoot:0.4,FlyingFox:0.05,(Rhesus:0.06," "Human:0.0):0.04);"
def test_align_to_ref(self):
"""correctly aligns to a reference"""
aligner = align_app.align_to_ref(ref_seq="Human")
aln = aligner(self.seqs)
expect = {
"Bandicoot": "---NACTCATTAATGCTTGAAACCAGCAGTTTATTGTCCAAC",
"FlyingFox": "GCCAGCTCTTTACAGCATGAGAACAG---TTTATTATACACT",
"Human": "GCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT",
"Rhesus": "GCCAGCTCATTACAGCATGAGAAC---AGTTTGTTACTCACT",
}
self.assertEqual(aln.to_dict(), expect)
def test_align_to_ref_generic_moltype(self):
"""tests when the moltype is generic"""
def test_general_is_not_stationary(self):
"""should not be stationary"""
gen_lf = self.make_cached("general", max_evaluations=5)
mprobs = gen_lf.get_motif_probs()
mprobs = array([mprobs[nuc] for nuc in DNA.alphabet])
for edge in self.tree:
psub = gen_lf.get_psub_for_edge(edge.name)
pi = dot(mprobs, psub.array)
try:
self.assertFloatEqual(mprobs, pi)
except AssertionError:
pass
def test_score_seq_obj(self):
"""produce correct score from seq"""
from cogent3 import DNA
data = [
[0.1, 0.3, 0.5, 0.1],
[0.25, 0.25, 0.25, 0.25],
[0.05, 0.8, 0.05, 0.1],
[0.7, 0.1, 0.1, 0.1],
[0.6, 0.15, 0.05, 0.2],
]
pssm = PSSM(data, "ACTG")
seq = DNA.make_seq("".join("ACTG"[i] for i in [3, 1, 2, 0, 2, 2, 3]))
scores = pssm.score_seq(seq)
assert_allclose(scores, [-4.481, -5.703, -2.966], atol=1e-3)
def setUp(self):
self.cigar_text = "3D2M3D6MDM2D3MD"
self.aln_seq = DNA.make_seq("---AA---GCTTAG-A--CCT-")
self.aln_seq1 = DNA.make_seq("CCAAAAAA---TAGT-GGC--G")
self.map, self.seq = self.aln_seq.parse_out_gaps()
self.map1, self.seq1 = self.aln_seq1.parse_out_gaps()
self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
self.aln = make_aligned_seqs(
{"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1}, array_align=False
)
self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}
# as_indices requires an alphabet
get_fourfold_degenerate_sets(get_code(1), as_indices=True)
expect = set()
for di in "GC", "GG", "CT", "CC", "TC", "CG", "AC", "GT":
codons = list(
map(
lambda x: tuple(DNA.alphabet.to_indices(x)),
[di + n for n in "ACGT"],
)
)
expect.update([frozenset(codons)])
for i in range(1, 3):
got = get_fourfold_degenerate_sets(
get_code(i), alphabet=DNA.alphabet, as_indices=True
)
self.assertEqual(got, expect)
def test_maps_on_maps(self):
seq = DNA.make_seq("ATCGATCGAT" * 5, name="base")
feat1 = annotate(seq, 10, 20, "fake")
feat2 = annotate(feat1, 3, 5, "fake2")
feat3 = annotate(seq, 1, 3, "left")
seq2 = seq[5:]
self.assertEqual(
structure(seq),
(
"seq",
50,
[("fake", "[10:20]/50", [("fake2", "[3:5]/10")]), ("left", "[1:3]/50")],
),
)
self.assertEqual(
structure(seq2),
("seq", 45, [("fake", "[5:15]/45", [("fake2", "[3:5]/10")])]),
Q[coord] *= val
row_sum = Q.sum(axis=1)
scale = 1 / (0.25 * row_sum).sum()
for i in range(4):
Q[i, i] -= row_sum[i]
Q *= scale
return expm(Q)(length)
class NewQ(TestCase):
aln = make_aligned_seqs(
data={
"seq1": "TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT",
"seq2": "TGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT",
},
moltype=DNA,
)
tree = make_tree(tip_names=["seq1", "seq2"])
symm_nuc_probs = dict(A=0.25, T=0.25, C=0.25, G=0.25)
symm_root_probs = _dinuc_root_probs(symm_nuc_probs)
asymm_nuc_probs = dict(A=0.1, T=0.1, C=0.4, G=0.4)
asymm_root_probs = _dinuc_root_probs(asymm_nuc_probs)
posn_root_probs = _dinuc_root_probs(symm_nuc_probs, asymm_nuc_probs)
cond_root_probs = dict(
[
(n1 + n2, p1 * [0.1, 0.7][n1 == n2])
for n1, p1 in list(asymm_nuc_probs.items())
for n2 in "ATCG"
]
)
def test_assemble_seq(self):
"""should correctly fill in a sequence with N's"""
expect = DNA.make_sequence("NAAAAANNCCCCCNNGGGNNN")
frags = ["AAAAA", "CCCCC", "GGG"]
positions = [(11, 16), (18, 23), (25, 28)]
self.assertEqual(_assemble_seq(frags, 10, 31, positions), expect)
positions = [(1, 6), (8, 13), (15, 18)]
self.assertEqual(_assemble_seq(frags, 0, 21, positions), expect)
# should work with:
# start matches first frag start
expect = DNA.make_sequence("AAAAANNCCCCCNNGGGNNN")
positions = [(0, 5), (7, 12), (14, 17)]
self.assertEqual(_assemble_seq(frags, 0, 20, positions), expect)
# end matches last frag_end
expect = DNA.make_sequence("NAAAAANNCCCCCNNGGG")
positions = [(11, 16), (18, 23), (25, 28)]
self.assertEqual(_assemble_seq(frags, 10, 28, positions), expect)
# both start and end matched
expect = DNA.make_sequence("AAAAANNCCCCCNNGGG")
positions = [(10, 15), (17, 22), (24, 27)]
self.assertEqual(_assemble_seq(frags, 10, 27, positions), expect)
# one frag
expect = DNA.make_sequence(''.join(frags))
positions = [(10, 23)]
self.assertEqual(_assemble_seq([''.join(frags)], 10, 23, positions),
expect)
def aligned_from_cigar(cigar_text, seq, moltype=DNA):
"""returns an Aligned sequence from a cigar string, sequence and moltype"""
if isinstance(seq, str):
seq = moltype.make_seq(seq)
map = cigar_to_map(cigar_text)
aligned_seq = seq.gapped_by_map(map)
return aligned_seq