How to use the tsinfer.formats.SampleData function in tsinfer

To help you get started, we’ve selected a few tsinfer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_with_metadata_and_individuals(self):
        ts = get_example_individuals_ts_with_metadata(5, 2, 10, 1)
        with formats.SampleData(sequence_length=ts.sequence_length) as sample_data:
            self.verify_data_round_trip(ts, sample_data)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
self.assertRaises(
            ValueError,
            sample_data.add_site,
            position=0,
            alleles=["0", "1"],
            genotypes=[],
        )
        sample_data = formats.SampleData(sequence_length=10)
        self.assertRaises(
            ValueError,
            sample_data.add_site,
            position=0,
            alleles=["0", "1"],
            genotypes=[0],
        )
        sample_data = formats.SampleData(sequence_length=10)
        sample_data.add_individual(ploidy=3)
        self.assertRaises(
            ValueError,
            sample_data.add_site,
            position=0,
            alleles=["0", "1"],
            genotypes=[0],
        )
        sample_data = formats.SampleData(sequence_length=10)
        self.assertRaises(
            ValueError, sample_data.add_individual, ploidy=3, samples_metadata=[None]
        )
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_no_sites(self):
        sample_data = formats.SampleData(sequence_length=10)
        sample_data.add_individual()
        self.assertRaises(ValueError, sample_data.finalise)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_population_metadata(self):
        sample_data = formats.SampleData(sequence_length=10)
        sample_data.add_population({"a": 1})
        sample_data.add_population({"b": 2})
        sample_data.add_individual(population=0)
        sample_data.add_individual(population=1)
        sample_data.add_site(position=0, genotypes=[0, 1])
        sample_data.finalise()

        self.assertEqual(sample_data.populations_metadata[0], {"a": 1})
        self.assertEqual(sample_data.populations_metadata[1], {"b": 2})
        self.assertEqual(sample_data.individuals_population[0], 0)
        self.assertEqual(sample_data.individuals_population[1], 1)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_add_individual_errors(self):
        sample_data = formats.SampleData(sequence_length=10)
        self.assertRaises(TypeError, sample_data.add_individual, metadata=234)
        self.assertRaises(ValueError, sample_data.add_individual, population=0)
        sample_data = formats.SampleData(sequence_length=10)
        sample_data.add_population()
        self.assertRaises(ValueError, sample_data.add_individual, population=1)
        self.assertRaises(ValueError, sample_data.add_individual, location="x234")
        self.assertRaises(ValueError, sample_data.add_individual, ploidy=0)
        self.assertRaises(ValueError, sample_data.add_individual, time=None)
        self.assertRaises(ValueError, sample_data.add_individual, time=[1, 2])
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_sequence_length(self):
        data = formats.SampleData(sequence_length=2)
        data.add_site(position=0, genotypes=[0, 1, 1, 0])
        data.finalise()
        self.assertEqual(data.sequence_length, 2)
        # The default sequence length should be the last site + 1.
        data = formats.SampleData()
        data.add_site(position=0, genotypes=[0, 1, 1, 0])
        data.finalise()
        self.assertEqual(data.sequence_length, 1)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_add_site_return(self):
        sample_data = formats.SampleData(sequence_length=10)
        sid = sample_data.add_site(0, [0, 1])
        self.assertEqual(sid, 0)
        sid = sample_data.add_site(1, [0, 1])
        self.assertEqual(sid, 1)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_chunk_size_file_equal(self):
        ts = get_example_ts(13, 15)
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            files = []
            for chunk_size in [5, 7]:
                filename = os.path.join(tempdir, "samples_{}.tmp".format(chunk_size))
                files.append(filename)
                with formats.SampleData(
                    sequence_length=ts.sequence_length,
                    path=filename,
                    chunk_size=chunk_size,
                ) as input_file:
                    self.verify_data_round_trip(ts, input_file)
                    self.assertEqual(
                        input_file.sites_genotypes.chunks, (chunk_size, chunk_size)
                    )
            # Now reload the files and check they are equal
            with formats.SampleData.load(files[0]) as input_file0:
                with formats.SampleData.load(files[1]) as input_file1:
                    # Can't use eq here because UUIDs will not be equal.
                    self.assertTrue(input_file0.data_equal(input_file1))
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_multichar_alleles(self):
        ts = get_example_ts(5, 17)
        t = ts.dump_tables()
        t.sites.clear()
        t.mutations.clear()
        for site in ts.sites():
            t.sites.add_row(site.position, ancestral_state="A" * (site.id + 1))
            for mutation in site.mutations:
                t.mutations.add_row(
                    site=site.id, node=mutation.node, derived_state="T" * site.id
                )
        ts = t.tree_sequence()
        input_file = formats.SampleData(sequence_length=ts.sequence_length)
        self.verify_data_round_trip(ts, input_file)
github tskit-dev / tsinfer / tsinfer / formats.py View on Github external
("num_samples", self.num_samples),
            ("num_sites", self.num_sites),
            ("populations/metadata", zarr_summary(self.populations_metadata)),
            ("individuals/metadata", zarr_summary(self.individuals_metadata)),
            ("individuals/location", zarr_summary(self.individuals_location)),
            ("individuals/time", zarr_summary(self.individuals_time)),
            ("individuals/population", zarr_summary(self.individuals_population)),
            ("samples/individual", zarr_summary(self.samples_individual)),
            ("samples/metadata", zarr_summary(self.samples_metadata)),
            ("sites/position", zarr_summary(self.sites_position)),
            ("sites/time", zarr_summary(self.sites_time)),
            ("sites/alleles", zarr_summary(self.sites_alleles)),
            ("sites/genotypes", zarr_summary(self.sites_genotypes)),
            ("sites/metadata", zarr_summary(self.sites_metadata)),
        ]
        return super(SampleData, self).__str__() + self._format_str(values)