How to use the tsinfer.match_samples function in tsinfer

To help you get started, we’ve selected a few tsinfer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def verify_example(self, subset, samples, ancestors, path_compression):
        ancestors_ts = tsinfer.match_ancestors(
            samples, ancestors, path_compression=path_compression
        )
        augmented_ancestors = tsinfer.augment_ancestors(
            samples, ancestors_ts, subset, path_compression=path_compression
        )
        self.verify_augmented_ancestors(
            subset, ancestors_ts, augmented_ancestors, path_compression
        )

        # Run the inference now
        final_ts = tsinfer.match_samples(samples, augmented_ancestors, simplify=False)
        t1 = ancestors_ts.dump_tables()
        tables = final_ts.tables
        for j, index in enumerate(subset):
            sample_id = final_ts.samples()[index]
            edges = [e for e in final_ts.edges() if e.child == sample_id]
            self.assertEqual(len(edges), 1)
            self.assertEqual(edges[0].left, 0)
            self.assertEqual(edges[0].right, final_ts.sequence_length)
            parent = edges[0].parent
            original_node = len(t1.nodes) + j
            self.assertEqual(
                tables.nodes.flags[original_node], tsinfer.NODE_IS_SAMPLE_ANCESTOR
            )
            # Most of the time the parent is the original node. However, in
            # simple cases it can be somewhere up the tree above it.
            if parent != original_node:
github tskit-dev / tsinfer / tests / test_evaluation.py View on Github external
def verify_from_inferred(self, remove_leaves):
        ts = msprime.simulate(15, recombination_rate=1, mutation_rate=2, random_seed=3)
        samples = tsinfer.SampleData.from_tree_sequence(ts)
        inferred = tsinfer.infer(samples)
        ancestors_ts = tsinfer.make_ancestors_ts(
            samples, inferred, remove_leaves=remove_leaves
        )
        tsinfer.check_ancestors_ts(ancestors_ts)
        for engine in [tsinfer.PY_ENGINE, tsinfer.C_ENGINE]:
            final_ts = tsinfer.match_samples(samples, ancestors_ts, engine=engine)
        tsinfer.verify(samples, final_ts)
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
dtype=np.int8,
        )
        start = ancestor_data.ancestors_start[:]
        end = ancestor_data.ancestors_end[:]
        ancestors = ancestor_data.ancestors_haplotype[:]
        for j in range(ancestor_data.num_ancestors):
            A[start[j] : end[j], j] = ancestors[j]
        for engine in [tsinfer.PY_ENGINE, tsinfer.C_ENGINE]:
            ancestors_ts = tsinfer.match_ancestors(
                sample_data, ancestor_data, engine=engine
            )
            tsinfer.check_ancestors_ts(ancestors_ts)
            self.assertEqual(ancestor_data.num_sites, ancestors_ts.num_sites)
            self.assertEqual(ancestor_data.num_ancestors, ancestors_ts.num_samples)
            self.assertTrue(np.array_equal(ancestors_ts.genotype_matrix(), A))
            inferred_ts = tsinfer.match_samples(
                sample_data, ancestors_ts, engine=engine
            )
            self.assertTrue(
                np.array_equal(inferred_ts.genotype_matrix(), ts.genotype_matrix())
            )
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
for v in ts.variants():
            sample_data.add_site(v.site.position, v.genotypes, v.alleles)
        sample_data.finalise()

        ancestor_data = tsinfer.AncestorData(sample_data)
        tsinfer.build_simulated_ancestors(sample_data, ancestor_data, ts)
        ancestor_data.finalise()
        ancestors_ts = tsinfer.match_ancestors(
            sample_data,
            ancestor_data,
            engine=engine,
            path_compression=path_compression,
            precision=precision,
            extended_checks=True,
        )
        inferred_ts = tsinfer.match_samples(
            sample_data,
            ancestors_ts,
            engine=engine,
            simplify=True,
            path_compression=path_compression,
            precision=precision,
            extended_checks=True,
        )
        return inferred_ts
github mcveanlab / treeseq-inference / src / run_old_tsinfer.py View on Github external
ancestors_ts = tsinfer.match_ancestors(
            sample_data, ancestor_data, method=args.method, 
            path_compression=args.shared_recombinations)
        ts = tsinfer.match_samples(
            sample_data, ancestors_ts, method=args.method, 
            path_compression=args.shared_recombinations,
            simplify=True)
    else:
        ancestor_data = formats.AncestorData.initialise(sample_data, compressor=None)
        tsinfer.build_ancestors(sample_data, ancestor_data, method=args.method)
        ancestor_data.finalise()
        ancestors_ts = tsinfer.match_ancestors(
            sample_data, ancestor_data, method=args.method, 
            num_threads=args.threads,
            path_compression=args.shared_recombinations)
        ts = tsinfer.match_samples(
            sample_data, ancestors_ts, method=args.method, 
            path_compression=args.shared_recombinations,
            simplify=True)
    ts.dump(args.output)
github mcveanlab / treeseq-inference / src / run_tsinfer.py View on Github external
logging.warning("TSinfer now simply ignores error probabilities. You can omit this parameter")
    engine = tsinfer.PY_ENGINE if args.method == "P" else tsinfer.C_ENGINE

    if not os.path.isfile(args.samples):
        raise ValueError("No samples file")
    sample_data = tsinfer.load(args.samples)
    if all(False for _ in sample_data.genotypes(inference_sites=True)):
        raise ValueError("No inference sites")
    if args.inject_real_ancestors_from_ts is not None:
        ancestor_data = tsinfer.AncestorData.initialise(sample_data, compressor=None)
        orig_ts = msprime.load(args.inject_real_ancestors_from_ts)
        eval_util.build_simulated_ancestors(sample_data, ancestor_data, orig_ts)
        ancestor_data.finalise()
        ancestors_ts = tsinfer.match_ancestors(
            sample_data, ancestor_data, path_compression=args.shared_recombinations, engine=engine)
        ts = tsinfer.match_samples(
            sample_data, ancestors_ts, path_compression=args.shared_recombinations, engine=engine, simplify=True)
    else:
        ts = tsinfer.infer(
            sample_data, num_threads=args.threads, path_compression=args.shared_recombinations, engine=engine)
    ts.dump(args.output)
github tskit-dev / tsinfer / visualisation.py View on Github external
sample_data,
        ancestors_ts,
        engine=engine,
        simplify=False,
        path_compression=path_compression,
        extended_checks=True,
    )

    prefix = "tmp__NOBACKUP__/"
    visualiser = Visualiser(
        ts, sample_data, ancestor_data, inferred_ts, box_size=box_size
    )
    visualiser.draw_copying_paths(os.path.join(prefix, "copying_{}.png"))

    # tsinfer.print_tree_pairs(ts, inferred_ts, compute_distances=False)
    inferred_ts = tsinfer.match_samples(
        sample_data,
        ancestors_ts,
        engine=engine,
        simplify=True,
        path_compression=False,
        stabilise_node_ordering=True,
    )

    tsinfer.print_tree_pairs(ts, inferred_ts, compute_distances=True)
    sys.stdout.flush()
    print(
        "num_sites = ",
        inferred_ts.num_sites,
        "num_mutations= ",
        inferred_ts.num_mutations,
    )