Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_with_metadata_and_individuals(self):
ts = get_example_individuals_ts_with_metadata(5, 2, 10, 1)
with formats.SampleData(sequence_length=ts.sequence_length) as sample_data:
self.verify_data_round_trip(ts, sample_data)
self.assertRaises(
ValueError,
sample_data.add_site,
position=0,
alleles=["0", "1"],
genotypes=[],
)
sample_data = formats.SampleData(sequence_length=10)
self.assertRaises(
ValueError,
sample_data.add_site,
position=0,
alleles=["0", "1"],
genotypes=[0],
)
sample_data = formats.SampleData(sequence_length=10)
sample_data.add_individual(ploidy=3)
self.assertRaises(
ValueError,
sample_data.add_site,
position=0,
alleles=["0", "1"],
genotypes=[0],
)
sample_data = formats.SampleData(sequence_length=10)
self.assertRaises(
ValueError, sample_data.add_individual, ploidy=3, samples_metadata=[None]
)
def test_no_sites(self):
sample_data = formats.SampleData(sequence_length=10)
sample_data.add_individual()
self.assertRaises(ValueError, sample_data.finalise)
def test_population_metadata(self):
sample_data = formats.SampleData(sequence_length=10)
sample_data.add_population({"a": 1})
sample_data.add_population({"b": 2})
sample_data.add_individual(population=0)
sample_data.add_individual(population=1)
sample_data.add_site(position=0, genotypes=[0, 1])
sample_data.finalise()
self.assertEqual(sample_data.populations_metadata[0], {"a": 1})
self.assertEqual(sample_data.populations_metadata[1], {"b": 2})
self.assertEqual(sample_data.individuals_population[0], 0)
self.assertEqual(sample_data.individuals_population[1], 1)
def test_add_individual_errors(self):
sample_data = formats.SampleData(sequence_length=10)
self.assertRaises(TypeError, sample_data.add_individual, metadata=234)
self.assertRaises(ValueError, sample_data.add_individual, population=0)
sample_data = formats.SampleData(sequence_length=10)
sample_data.add_population()
self.assertRaises(ValueError, sample_data.add_individual, population=1)
self.assertRaises(ValueError, sample_data.add_individual, location="x234")
self.assertRaises(ValueError, sample_data.add_individual, ploidy=0)
self.assertRaises(ValueError, sample_data.add_individual, time=None)
self.assertRaises(ValueError, sample_data.add_individual, time=[1, 2])
def test_sequence_length(self):
data = formats.SampleData(sequence_length=2)
data.add_site(position=0, genotypes=[0, 1, 1, 0])
data.finalise()
self.assertEqual(data.sequence_length, 2)
# The default sequence length should be the last site + 1.
data = formats.SampleData()
data.add_site(position=0, genotypes=[0, 1, 1, 0])
data.finalise()
self.assertEqual(data.sequence_length, 1)
def test_add_site_return(self):
sample_data = formats.SampleData(sequence_length=10)
sid = sample_data.add_site(0, [0, 1])
self.assertEqual(sid, 0)
sid = sample_data.add_site(1, [0, 1])
self.assertEqual(sid, 1)
def test_chunk_size_file_equal(self):
ts = get_example_ts(13, 15)
with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
files = []
for chunk_size in [5, 7]:
filename = os.path.join(tempdir, "samples_{}.tmp".format(chunk_size))
files.append(filename)
with formats.SampleData(
sequence_length=ts.sequence_length,
path=filename,
chunk_size=chunk_size,
) as input_file:
self.verify_data_round_trip(ts, input_file)
self.assertEqual(
input_file.sites_genotypes.chunks, (chunk_size, chunk_size)
)
# Now reload the files and check they are equal
with formats.SampleData.load(files[0]) as input_file0:
with formats.SampleData.load(files[1]) as input_file1:
# Can't use eq here because UUIDs will not be equal.
self.assertTrue(input_file0.data_equal(input_file1))
def test_multichar_alleles(self):
ts = get_example_ts(5, 17)
t = ts.dump_tables()
t.sites.clear()
t.mutations.clear()
for site in ts.sites():
t.sites.add_row(site.position, ancestral_state="A" * (site.id + 1))
for mutation in site.mutations:
t.mutations.add_row(
site=site.id, node=mutation.node, derived_state="T" * site.id
)
ts = t.tree_sequence()
input_file = formats.SampleData(sequence_length=ts.sequence_length)
self.verify_data_round_trip(ts, input_file)
("num_samples", self.num_samples),
("num_sites", self.num_sites),
("populations/metadata", zarr_summary(self.populations_metadata)),
("individuals/metadata", zarr_summary(self.individuals_metadata)),
("individuals/location", zarr_summary(self.individuals_location)),
("individuals/time", zarr_summary(self.individuals_time)),
("individuals/population", zarr_summary(self.individuals_population)),
("samples/individual", zarr_summary(self.samples_individual)),
("samples/metadata", zarr_summary(self.samples_metadata)),
("sites/position", zarr_summary(self.sites_position)),
("sites/time", zarr_summary(self.sites_time)),
("sites/alleles", zarr_summary(self.sites_alleles)),
("sites/genotypes", zarr_summary(self.sites_genotypes)),
("sites/metadata", zarr_summary(self.sites_metadata)),
]
return super(SampleData, self).__str__() + self._format_str(values)