How to use the tsinfer.load function in tsinfer

To help you get started, we’ve selected a few tsinfer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_zero_sequence_length(self):
        # Mangle a sample data file to force a zero sequence length.
        ts = msprime.simulate(10, mutation_rate=2, random_seed=5)
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            filename = os.path.join(tempdir, "samples.tmp")
            with tsinfer.SampleData(path=filename) as sample_data:
                for var in ts.variants():
                    sample_data.add_site(var.site.position, var.genotypes)
            store = zarr.LMDBStore(filename, subdir=False)
            data = zarr.open(store=store, mode="w+")
            data.attrs["sequence_length"] = 0
            store.close()
            sample_data = tsinfer.load(filename)
            self.assertEqual(sample_data.sequence_length, 0)
            self.assertRaises(ValueError, tsinfer.generate_ancestors, sample_data)
github tskit-dev / tsinfer / tests / test_inference.py View on Github external
def test_path_compression_parent_child_identical_times(self):
        # This provoked a bug in which we created a pc ancestor
        # with the same time as its child, creating an invalid topology.
        sample_data = tsinfer.load("tests/data/bugs/invalid_pc_ancestor_time.samples")
        ts = tsinfer.infer(sample_data)
        for var, (_, genotypes) in zip(ts.variants(), sample_data.genotypes()):
            self.assertTrue(np.array_equal(var.genotypes, genotypes))
github tskit-dev / tsinfer / tests / test_cli.py View on Github external
)
        self.ancestor_trees = str(
            pathlib.Path(self.tempdir.name, "input-data.ancestors.trees")
        )
        self.output_trees = str(pathlib.Path(self.tempdir.name, "input-data.trees"))
        self.input_ts = msprime.simulate(
            10, mutation_rate=10, recombination_rate=10, random_seed=10
        )
        sample_data = tsinfer.SampleData(
            sequence_length=self.input_ts.sequence_length, path=self.sample_file
        )
        for var in self.input_ts.variants():
            sample_data.add_site(var.site.position, var.genotypes, var.alleles)
        sample_data.finalise()
        tsinfer.generate_ancestors(sample_data, path=self.ancestor_file, chunk_size=10)
        ancestor_data = tsinfer.load(self.ancestor_file)
        ancestors_ts = tsinfer.match_ancestors(sample_data, ancestor_data)
        ancestors_ts.dump(self.ancestor_trees)
        ts = tsinfer.match_samples(sample_data, ancestors_ts)
        ts.dump(self.output_trees)
        sample_data.close()
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_defaults_with_path(self):
        ts = get_example_ts(10, 10)
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            filename = os.path.join(tempdir, "samples.tmp")
            input_file = formats.SampleData(
                path=filename, sequence_length=ts.sequence_length
            )
            self.verify_data_round_trip(ts, input_file)
            compressor = formats.DEFAULT_COMPRESSOR
            for _, array in input_file.arrays():
                self.assertEqual(array.compressor, compressor)
            with tsinfer.load(filename) as other:
                self.assertEqual(other, input_file)
github mcveanlab / treeseq-inference / human-data / tsutil.py View on Github external
def run_combine_ukbb_1kg(args):
    ukbb_samples_file = "ukbb_{}.samples".format(args.chromosome)
    tg_ancestors_ts_file = "1kg_{}.trees".format(args.chromosome)
    ancestors_ts_file = "1kg_ukbb_{}.ancestors.trees".format(args.chromosome)
    samples_file = "1kg_ukbb_{}.samples".format(args.chromosome)

    ukbb_samples = tsinfer.load(ukbb_samples_file)
    tg_ancestors_ts = tskit.load(tg_ancestors_ts_file)
    print("Loaded ts:", tg_ancestors_ts.num_nodes, tg_ancestors_ts.num_edges)

    # Subset the sites down to the UKBB sites.
    tables = tg_ancestors_ts.dump_tables()
    ukbb_sites = set(ukbb_samples.sites_position[:])
    ancestors_sites = set(tables.sites.position[:])
    intersecting_sites = ancestors_sites & ukbb_sites

    print("Intersecting sites = ", len(intersecting_sites))
    tables.sites.clear()
    tables.mutations.clear()
    for site in tg_ancestors_ts.sites():
        if site.position in intersecting_sites:
            # Sites must be 0/1 for the ancestors ts.
            site_id = tables.sites.add_row(
github mcveanlab / treeseq-inference / human-data / bgen_to_samples.py View on Github external
parser.add_argument(
        "-n", "--use_first_n_samples", type=int,
        help="Only use the first n (diploid) samples from UK-biobank")
    parser.add_argument(
        "-p", "--progress", action="store_true",
        help="Show progress bars and output extra information when done")

    args = parser.parse_args()

    if not os.path.exists(args.bgen_file):
        raise ValueError("{} does not exist".format(args.bgen_file))
    if not os.path.exists(args.sampledata_file):
        raise ValueError("{} does not exist".format(args.sampledata_file))

    bgen = bgen_reader.read_bgen(args.bgen_file, verbose=False, size = 500)
    sample_data =  tsinfer.load(args.sampledata_file)
    create_datafile(sample_data, bgen, args.outfile, show_progress=args.progress)
github mcveanlab / treeseq-inference / src / run_tsinfer.py View on Github external
help="Instead of inferring ancestors, construct known ones from this tree sequence file path")
    parser.add_argument(
        "-V", "--version", action='version', version=description)
    

    args = parser.parse_args()
        
    if args.recombination_rate is not None:
        logging.warning("TSinfer now simply ignores recombination rate. You can omit this parameter")
    if args.error_probability is not None:
        logging.warning("TSinfer now simply ignores error probabilities. You can omit this parameter")
    engine = tsinfer.PY_ENGINE if args.method == "P" else tsinfer.C_ENGINE

    if not os.path.isfile(args.samples):
        raise ValueError("No samples file")
    sample_data = tsinfer.load(args.samples)
    if all(False for _ in sample_data.genotypes(inference_sites=True)):
        raise ValueError("No inference sites")
    if args.inject_real_ancestors_from_ts is not None:
        ancestor_data = tsinfer.AncestorData.initialise(sample_data, compressor=None)
        orig_ts = msprime.load(args.inject_real_ancestors_from_ts)
        eval_util.build_simulated_ancestors(sample_data, ancestor_data, orig_ts)
        ancestor_data.finalise()
        ancestors_ts = tsinfer.match_ancestors(
            sample_data, ancestor_data, path_compression=args.shared_recombinations, engine=engine)
        ts = tsinfer.match_samples(
            sample_data, ancestors_ts, path_compression=args.shared_recombinations, engine=engine, simplify=True)
    else:
        ts = tsinfer.infer(
            sample_data, num_threads=args.threads, path_compression=args.shared_recombinations, engine=engine)
    ts.dump(args.output)