How to use the tsinfer.SampleData.from_tree_sequence function in tsinfer

To help you get started, we’ve selected a few tsinfer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def test_one_sites(self):
        ts = msprime.simulate(15, mutation_rate=2, recombination_rate=2, random_seed=3)
        sample_data = tsinfer.SampleData.from_tree_sequence(ts)
        position = get_default_inference_sites(sample_data)
        self.verify(sample_data, position[:1])
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def verify(self, sample_data, position_subset):
        full_ts = tsinfer.infer(sample_data)
        subset_ts = self.subset_sites(full_ts, position_subset)
        ancestor_data = tsinfer.generate_ancestors(sample_data)
        ancestors_ts = tsinfer.match_ancestors(sample_data, ancestor_data)
        subset_ancestors_ts = tsinfer.minimise(
            self.subset_sites(ancestors_ts, position_subset)
        )
        subset_ancestors_ts = subset_ancestors_ts.simplify()
        subset_sample_data = tsinfer.SampleData.from_tree_sequence(subset_ts)
        output_ts = tsinfer.match_samples(subset_sample_data, subset_ancestors_ts)
        self.assertTrue(
            np.array_equal(output_ts.genotype_matrix(), subset_ts.genotype_matrix())
        )
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_append_sites_incompatible_files(self):
        ts = get_example_individuals_ts_with_metadata(4, 2, 10)
        sd1 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[0, 2]]))
        mid_ts = ts.keep_intervals([[2, 5]])
        sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts)
        sd3 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[5, 10]]))
        # Fails if altered SD is not in write mode
        self.assertRaisesRegexp(ValueError, "build", sd1.append_sites, sd2, sd3)
        # Fails if added SDs are in write mode
        sd = sd1.copy()  # put into write mode
        sd.append_sites(sd2, sd3)  # now works
        self.assertRaisesRegexp(
            ValueError, "finalise", sd.append_sites, sd2.copy(), sd3
        )
        sd = sd1.copy()  # put into write mode

        # Wrong seq length
        sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts.rtrim())
        self.assertRaisesRegexp(ValueError, "length", sd.append_sites, sd2, sd3)
        # Wrong num samples
        sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts.simplify(list(range(7))))
        self.assertRaisesRegexp(ValueError, "samples", sd.append_sites, sd2, sd3)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_access_individuals(self):
        ts = get_example_individuals_ts_with_metadata(5, 2, 10, 1)
        sd = tsinfer.SampleData.from_tree_sequence(ts)
        self.assertGreater(sd.num_individuals, 0)
        has_some_metadata = False
        for i, individual in enumerate(sd.individuals()):
            if individual.metadata is not None:
                has_some_metadata = True  # Check that we do compare something sometimes
            self.assertEqual(i, individual.id)
            other_ind = sd.individual(i)
            self.assertEqual(other_ind, individual)
            other_ind.samples = []
            self.assertNotEqual(other_ind, individual)
        self.assertTrue(has_some_metadata)
        self.assertEqual(i, sd.num_individuals - 1)
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def verify(self, ts):
        n = ts.num_samples
        self.assertGreater(ts.num_sites, 2)
        sd = tsinfer.SampleData.from_tree_sequence(ts)
        ts1 = tsinfer.infer(sd, simplify=True)
        # When simplify is true the samples should be zero to n.
        self.assertEqual(list(ts1.samples()), list(range(n)))
        for tree in ts1.trees():
            self.assertEqual(tree.num_samples(), len(list(tree.leaves())))

        # When simplify is true and there is no path compression,
        # the samples should be zero to N - n up to n
        ts2 = tsinfer.infer(sd, simplify=False, path_compression=False)
        self.assertEqual(
            list(ts2.samples()), list(range(ts2.num_nodes - n, ts2.num_nodes))
        )

        # Check that we're calling simplify with the correct arguments.
        ts2 = tsinfer.infer(sd, simplify=False).simplify(keep_unary=True)
        t1 = ts1.dump_tables()
github tskit-dev / tsinfer / tests / test_evaluation.py View on Github external
def test_inferred_no_simplify(self):
        ts = msprime.simulate(10, recombination_rate=2, mutation_rate=10, random_seed=2)
        samples = tsinfer.SampleData.from_tree_sequence(ts)
        ts = tsinfer.infer(samples, simplify=False)
        self.verify(ts)
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def test_different_ancestors_ts_match_samples(self):
        sim = msprime.simulate(sample_size=6, random_seed=1, mutation_rate=6)
        sample_data = tsinfer.SampleData.from_tree_sequence(sim)
        ancestor_data = tsinfer.generate_ancestors(sample_data)
        ancestors_ts = tsinfer.match_ancestors(sample_data, ancestor_data)

        sim = msprime.simulate(sample_size=6, random_seed=2, mutation_rate=6)
        sample_data = tsinfer.SampleData.from_tree_sequence(sim)
        self.assertRaises(ValueError, tsinfer.match_samples, sample_data, ancestors_ts)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_append_sites(self):
        ts = get_example_individuals_ts_with_metadata(4, 2, 10)
        sd1 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[0, 2]]))
        sd2 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[2, 5]]))
        sd3 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[5, 10]]))
        sd = sd1.copy()  # put into write mode
        sd.append_sites(sd2, sd3)
        sd.finalise()
        sd.assert_data_equal(tsinfer.SampleData.from_tree_sequence(ts))
        # Test that the full file passes though invisibly if no args given
        sd_full = sd.copy()
        sd_full.append_sites()
        sd_full.finalise()
        sd_full.assert_data_equal(tsinfer.SampleData.from_tree_sequence(ts))
github tskit-dev / tsinfer / visualisation.py View on Github external
def visualise_ancestors():
    ts = msprime.simulate(10, mutation_rate=2, recombination_rate=2, random_seed=3)
    ts = tsinfer.strip_singletons(ts)
    sample_data = tsinfer.SampleData.from_tree_sequence(ts)
    ancestor_data = tsinfer.generate_ancestors(sample_data)
    viz = AncestorBuilderViz(sample_data, ancestor_data)

    viz.draw(6, "ancestors_{}.svg")