Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def verify_example(self, subset, samples, ancestors, path_compression):
ancestors_ts = tsinfer.match_ancestors(
samples, ancestors, path_compression=path_compression
)
augmented_ancestors = tsinfer.augment_ancestors(
samples, ancestors_ts, subset, path_compression=path_compression
)
self.verify_augmented_ancestors(
subset, ancestors_ts, augmented_ancestors, path_compression
)
# Run the inference now
final_ts = tsinfer.match_samples(samples, augmented_ancestors, simplify=False)
t1 = ancestors_ts.dump_tables()
tables = final_ts.tables
for j, index in enumerate(subset):
sample_id = final_ts.samples()[index]
edges = [e for e in final_ts.edges() if e.child == sample_id]
self.assertEqual(len(edges), 1)
self.assertEqual(edges[0].left, 0)
self.assertEqual(edges[0].right, final_ts.sequence_length)
parent = edges[0].parent
original_node = len(t1.nodes) + j
self.assertEqual(
tables.nodes.flags[original_node], tsinfer.NODE_IS_SAMPLE_ANCESTOR
)
# Most of the time the parent is the original node. However, in
# simple cases it can be somewhere up the tree above it.
if parent != original_node:
def verify_from_inferred(self, remove_leaves):
ts = msprime.simulate(15, recombination_rate=1, mutation_rate=2, random_seed=3)
samples = tsinfer.SampleData.from_tree_sequence(ts)
inferred = tsinfer.infer(samples)
ancestors_ts = tsinfer.make_ancestors_ts(
samples, inferred, remove_leaves=remove_leaves
)
tsinfer.check_ancestors_ts(ancestors_ts)
for engine in [tsinfer.PY_ENGINE, tsinfer.C_ENGINE]:
final_ts = tsinfer.match_samples(samples, ancestors_ts, engine=engine)
tsinfer.verify(samples, final_ts)
dtype=np.int8,
)
start = ancestor_data.ancestors_start[:]
end = ancestor_data.ancestors_end[:]
ancestors = ancestor_data.ancestors_haplotype[:]
for j in range(ancestor_data.num_ancestors):
A[start[j] : end[j], j] = ancestors[j]
for engine in [tsinfer.PY_ENGINE, tsinfer.C_ENGINE]:
ancestors_ts = tsinfer.match_ancestors(
sample_data, ancestor_data, engine=engine
)
tsinfer.check_ancestors_ts(ancestors_ts)
self.assertEqual(ancestor_data.num_sites, ancestors_ts.num_sites)
self.assertEqual(ancestor_data.num_ancestors, ancestors_ts.num_samples)
self.assertTrue(np.array_equal(ancestors_ts.genotype_matrix(), A))
inferred_ts = tsinfer.match_samples(
sample_data, ancestors_ts, engine=engine
)
self.assertTrue(
np.array_equal(inferred_ts.genotype_matrix(), ts.genotype_matrix())
)
for v in ts.variants():
sample_data.add_site(v.site.position, v.genotypes, v.alleles)
sample_data.finalise()
ancestor_data = tsinfer.AncestorData(sample_data)
tsinfer.build_simulated_ancestors(sample_data, ancestor_data, ts)
ancestor_data.finalise()
ancestors_ts = tsinfer.match_ancestors(
sample_data,
ancestor_data,
engine=engine,
path_compression=path_compression,
precision=precision,
extended_checks=True,
)
inferred_ts = tsinfer.match_samples(
sample_data,
ancestors_ts,
engine=engine,
simplify=True,
path_compression=path_compression,
precision=precision,
extended_checks=True,
)
return inferred_ts
ancestors_ts = tsinfer.match_ancestors(
sample_data, ancestor_data, method=args.method,
path_compression=args.shared_recombinations)
ts = tsinfer.match_samples(
sample_data, ancestors_ts, method=args.method,
path_compression=args.shared_recombinations,
simplify=True)
else:
ancestor_data = formats.AncestorData.initialise(sample_data, compressor=None)
tsinfer.build_ancestors(sample_data, ancestor_data, method=args.method)
ancestor_data.finalise()
ancestors_ts = tsinfer.match_ancestors(
sample_data, ancestor_data, method=args.method,
num_threads=args.threads,
path_compression=args.shared_recombinations)
ts = tsinfer.match_samples(
sample_data, ancestors_ts, method=args.method,
path_compression=args.shared_recombinations,
simplify=True)
ts.dump(args.output)
logging.warning("TSinfer now simply ignores error probabilities. You can omit this parameter")
engine = tsinfer.PY_ENGINE if args.method == "P" else tsinfer.C_ENGINE
if not os.path.isfile(args.samples):
raise ValueError("No samples file")
sample_data = tsinfer.load(args.samples)
if all(False for _ in sample_data.genotypes(inference_sites=True)):
raise ValueError("No inference sites")
if args.inject_real_ancestors_from_ts is not None:
ancestor_data = tsinfer.AncestorData.initialise(sample_data, compressor=None)
orig_ts = msprime.load(args.inject_real_ancestors_from_ts)
eval_util.build_simulated_ancestors(sample_data, ancestor_data, orig_ts)
ancestor_data.finalise()
ancestors_ts = tsinfer.match_ancestors(
sample_data, ancestor_data, path_compression=args.shared_recombinations, engine=engine)
ts = tsinfer.match_samples(
sample_data, ancestors_ts, path_compression=args.shared_recombinations, engine=engine, simplify=True)
else:
ts = tsinfer.infer(
sample_data, num_threads=args.threads, path_compression=args.shared_recombinations, engine=engine)
ts.dump(args.output)
sample_data,
ancestors_ts,
engine=engine,
simplify=False,
path_compression=path_compression,
extended_checks=True,
)
prefix = "tmp__NOBACKUP__/"
visualiser = Visualiser(
ts, sample_data, ancestor_data, inferred_ts, box_size=box_size
)
visualiser.draw_copying_paths(os.path.join(prefix, "copying_{}.png"))
# tsinfer.print_tree_pairs(ts, inferred_ts, compute_distances=False)
inferred_ts = tsinfer.match_samples(
sample_data,
ancestors_ts,
engine=engine,
simplify=True,
path_compression=False,
stabilise_node_ordering=True,
)
tsinfer.print_tree_pairs(ts, inferred_ts, compute_distances=True)
sys.stdout.flush()
print(
"num_sites = ",
inferred_ts.num_sites,
"num_mutations= ",
inferred_ts.num_mutations,
)