Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_one_sites(self):
ts = msprime.simulate(15, mutation_rate=2, recombination_rate=2, random_seed=3)
sample_data = tsinfer.SampleData.from_tree_sequence(ts)
position = get_default_inference_sites(sample_data)
self.verify(sample_data, position[:1])
def verify(self, sample_data, position_subset):
full_ts = tsinfer.infer(sample_data)
subset_ts = self.subset_sites(full_ts, position_subset)
ancestor_data = tsinfer.generate_ancestors(sample_data)
ancestors_ts = tsinfer.match_ancestors(sample_data, ancestor_data)
subset_ancestors_ts = tsinfer.minimise(
self.subset_sites(ancestors_ts, position_subset)
)
subset_ancestors_ts = subset_ancestors_ts.simplify()
subset_sample_data = tsinfer.SampleData.from_tree_sequence(subset_ts)
output_ts = tsinfer.match_samples(subset_sample_data, subset_ancestors_ts)
self.assertTrue(
np.array_equal(output_ts.genotype_matrix(), subset_ts.genotype_matrix())
)
def test_append_sites_incompatible_files(self):
ts = get_example_individuals_ts_with_metadata(4, 2, 10)
sd1 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[0, 2]]))
mid_ts = ts.keep_intervals([[2, 5]])
sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts)
sd3 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[5, 10]]))
# Fails if altered SD is not in write mode
self.assertRaisesRegexp(ValueError, "build", sd1.append_sites, sd2, sd3)
# Fails if added SDs are in write mode
sd = sd1.copy() # put into write mode
sd.append_sites(sd2, sd3) # now works
self.assertRaisesRegexp(
ValueError, "finalise", sd.append_sites, sd2.copy(), sd3
)
sd = sd1.copy() # put into write mode
# Wrong seq length
sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts.rtrim())
self.assertRaisesRegexp(ValueError, "length", sd.append_sites, sd2, sd3)
# Wrong num samples
sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts.simplify(list(range(7))))
self.assertRaisesRegexp(ValueError, "samples", sd.append_sites, sd2, sd3)
def test_access_individuals(self):
ts = get_example_individuals_ts_with_metadata(5, 2, 10, 1)
sd = tsinfer.SampleData.from_tree_sequence(ts)
self.assertGreater(sd.num_individuals, 0)
has_some_metadata = False
for i, individual in enumerate(sd.individuals()):
if individual.metadata is not None:
has_some_metadata = True # Check that we do compare something sometimes
self.assertEqual(i, individual.id)
other_ind = sd.individual(i)
self.assertEqual(other_ind, individual)
other_ind.samples = []
self.assertNotEqual(other_ind, individual)
self.assertTrue(has_some_metadata)
self.assertEqual(i, sd.num_individuals - 1)
def verify(self, ts):
n = ts.num_samples
self.assertGreater(ts.num_sites, 2)
sd = tsinfer.SampleData.from_tree_sequence(ts)
ts1 = tsinfer.infer(sd, simplify=True)
# When simplify is true the samples should be zero to n.
self.assertEqual(list(ts1.samples()), list(range(n)))
for tree in ts1.trees():
self.assertEqual(tree.num_samples(), len(list(tree.leaves())))
# When simplify is true and there is no path compression,
# the samples should be zero to N - n up to n
ts2 = tsinfer.infer(sd, simplify=False, path_compression=False)
self.assertEqual(
list(ts2.samples()), list(range(ts2.num_nodes - n, ts2.num_nodes))
)
# Check that we're calling simplify with the correct arguments.
ts2 = tsinfer.infer(sd, simplify=False).simplify(keep_unary=True)
t1 = ts1.dump_tables()
def test_inferred_no_simplify(self):
ts = msprime.simulate(10, recombination_rate=2, mutation_rate=10, random_seed=2)
samples = tsinfer.SampleData.from_tree_sequence(ts)
ts = tsinfer.infer(samples, simplify=False)
self.verify(ts)
def test_different_ancestors_ts_match_samples(self):
sim = msprime.simulate(sample_size=6, random_seed=1, mutation_rate=6)
sample_data = tsinfer.SampleData.from_tree_sequence(sim)
ancestor_data = tsinfer.generate_ancestors(sample_data)
ancestors_ts = tsinfer.match_ancestors(sample_data, ancestor_data)
sim = msprime.simulate(sample_size=6, random_seed=2, mutation_rate=6)
sample_data = tsinfer.SampleData.from_tree_sequence(sim)
self.assertRaises(ValueError, tsinfer.match_samples, sample_data, ancestors_ts)
def test_append_sites(self):
ts = get_example_individuals_ts_with_metadata(4, 2, 10)
sd1 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[0, 2]]))
sd2 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[2, 5]]))
sd3 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[5, 10]]))
sd = sd1.copy() # put into write mode
sd.append_sites(sd2, sd3)
sd.finalise()
sd.assert_data_equal(tsinfer.SampleData.from_tree_sequence(ts))
# Test that the full file passes though invisibly if no args given
sd_full = sd.copy()
sd_full.append_sites()
sd_full.finalise()
sd_full.assert_data_equal(tsinfer.SampleData.from_tree_sequence(ts))
def visualise_ancestors():
ts = msprime.simulate(10, mutation_rate=2, recombination_rate=2, random_seed=3)
ts = tsinfer.strip_singletons(ts)
sample_data = tsinfer.SampleData.from_tree_sequence(ts)
ancestor_data = tsinfer.generate_ancestors(sample_data)
viz = AncestorBuilderViz(sample_data, ancestor_data)
viz.draw(6, "ancestors_{}.svg")