How to use tsinfer - 10 common examples

To help you get started, we’ve selected a few tsinfer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_provenance(self):
        sample_data, ancestors = self.get_example_data(10, 10, 40)
        ancestor_data = tsinfer.AncestorData(sample_data)
        self.verify_data_round_trip(sample_data, ancestor_data, ancestors)
        self.assertEqual(ancestor_data.num_provenances, sample_data.num_provenances + 1)

        timestamp = ancestor_data.provenances_timestamp[-1]
        iso = datetime.datetime.now().isoformat()
        self.assertEqual(timestamp.split("T")[0], iso.split("T")[0])
        record = ancestor_data.provenances_record[-1]
        self.assertEqual(record["software"]["name"], "tsinfer")
        a = list(ancestor_data.provenances())
        self.assertEqual(a[-1][0], timestamp)
        self.assertEqual(a[-1][1], record)
        for j, (timestamp, record) in enumerate(sample_data.provenances()):
            self.assertEqual(timestamp, a[j][0])
            self.assertEqual(record, a[j][1])
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def verify_round_trip(self, genotypes, exclude_sites):
        self.assertEqual(genotypes.shape[0], exclude_sites.shape[0])
        with tsinfer.SampleData() as sample_data:
            for j in range(genotypes.shape[0]):
                sample_data.add_site(j, genotypes[j])
        exclude_positions = sample_data.sites_position[:][exclude_sites]
        for simplify in [False, True]:
            output_ts = tsinfer.infer(
                sample_data, simplify=simplify, exclude_positions=exclude_positions
            )
            for tree in output_ts.trees():
                for site in tree.sites():
                    inf_type = json.loads(site.metadata)["inference_type"]
                    if exclude_sites[site.id]:
                        self.assertEqual(inf_type, tsinfer.INFERENCE_FITCH_PARSIMONY)
                    else:
                        self.assertEqual(inf_type, tsinfer.INFERENCE_FULL)
                    f = np.sum(genotypes[site.id])
                    if f == 0:
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_zero_sequence_length(self):
        # Mangle a sample data file to force a zero sequence length.
        ts = msprime.simulate(10, mutation_rate=2, random_seed=5)
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            filename = os.path.join(tempdir, "samples.tmp")
            with tsinfer.SampleData(path=filename) as sample_data:
                for var in ts.variants():
                    sample_data.add_site(var.site.position, var.genotypes)
            store = zarr.LMDBStore(filename, subdir=False)
            data = zarr.open(store=store, mode="w+")
            data.attrs["sequence_length"] = 0
            store.close()
            sample_data = tsinfer.load(filename)
            self.assertEqual(sample_data.sequence_length, 0)
            self.assertRaises(ValueError, tsinfer.generate_ancestors, sample_data)
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def test_match_ancestors_samples(self):
        with tsinfer.SampleData(sequence_length=2) as sample_data:
            sample_data.add_site(1, genotypes=[0, 1, 1, 0], alleles=["G", "C"])
        ancestor_data = tsinfer.generate_ancestors(sample_data)
        # match_ancestors fails when samples unfinalised
        unfinalised = tsinfer.SampleData(sequence_length=2)
        unfinalised.add_site(1, genotypes=[0, 1, 1, 0], alleles=["G", "C"])
        self.assertRaises(
            ValueError, tsinfer.match_ancestors, unfinalised, ancestor_data
        )
github tskit-dev / tsinfer / tests / test_provenance.py View on Github external
def test_infer(self):
        ts = msprime.simulate(10, mutation_rate=1, random_seed=1)
        self.assertGreater(ts.num_sites, 1)
        samples = tsinfer.SampleData.from_tree_sequence(ts)
        inferred_ts = tsinfer.infer(samples)
        self.validate_ts(inferred_ts)
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def test_large_random_data(self):
        n = 100
        m = 30
        G, positions = get_random_data_example(n, m)
        with tsinfer.SampleData(sequence_length=m) as sample_data:
            for genotypes, position in zip(G, positions):
                sample_data.add_site(position, genotypes)
        self.verify(sample_data)
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def test_one_sites(self):
        ts = msprime.simulate(15, mutation_rate=2, recombination_rate=2, random_seed=3)
        sample_data = tsinfer.SampleData.from_tree_sequence(ts)
        position = get_default_inference_sites(sample_data)
        self.verify(sample_data, position[:1])
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def verify(self, sample_data, position_subset):
        full_ts = tsinfer.infer(sample_data)
        subset_ts = self.subset_sites(full_ts, position_subset)
        ancestor_data = tsinfer.generate_ancestors(sample_data)
        ancestors_ts = tsinfer.match_ancestors(sample_data, ancestor_data)
        subset_ancestors_ts = tsinfer.minimise(
            self.subset_sites(ancestors_ts, position_subset)
        )
        subset_ancestors_ts = subset_ancestors_ts.simplify()
        subset_sample_data = tsinfer.SampleData.from_tree_sequence(subset_ts)
        output_ts = tsinfer.match_samples(subset_sample_data, subset_ancestors_ts)
        self.assertTrue(
            np.array_equal(output_ts.genotype_matrix(), subset_ts.genotype_matrix())
        )
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_append_sites_incompatible_files(self):
        ts = get_example_individuals_ts_with_metadata(4, 2, 10)
        sd1 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[0, 2]]))
        mid_ts = ts.keep_intervals([[2, 5]])
        sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts)
        sd3 = tsinfer.SampleData.from_tree_sequence(ts.keep_intervals([[5, 10]]))
        # Fails if altered SD is not in write mode
        self.assertRaisesRegexp(ValueError, "build", sd1.append_sites, sd2, sd3)
        # Fails if added SDs are in write mode
        sd = sd1.copy()  # put into write mode
        sd.append_sites(sd2, sd3)  # now works
        self.assertRaisesRegexp(
            ValueError, "finalise", sd.append_sites, sd2.copy(), sd3
        )
        sd = sd1.copy()  # put into write mode

        # Wrong seq length
        sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts.rtrim())
        self.assertRaisesRegexp(ValueError, "length", sd.append_sites, sd2, sd3)
        # Wrong num samples
        sd2 = tsinfer.SampleData.from_tree_sequence(mid_ts.simplify(list(range(7))))
        self.assertRaisesRegexp(ValueError, "samples", sd.append_sites, sd2, sd3)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_access_individuals(self):
        ts = get_example_individuals_ts_with_metadata(5, 2, 10, 1)
        sd = tsinfer.SampleData.from_tree_sequence(ts)
        self.assertGreater(sd.num_individuals, 0)
        has_some_metadata = False
        for i, individual in enumerate(sd.individuals()):
            if individual.metadata is not None:
                has_some_metadata = True  # Check that we do compare something sometimes
            self.assertEqual(i, individual.id)
            other_ind = sd.individual(i)
            self.assertEqual(other_ind, individual)
            other_ind.samples = []
            self.assertNotEqual(other_ind, individual)
        self.assertTrue(has_some_metadata)
        self.assertEqual(i, sd.num_individuals - 1)