How to use the tsinfer.SampleData.load function in tsinfer

To help you get started, we’ve selected a few tsinfer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mcveanlab / treeseq-inference / src / run_old_tsinfer.py View on Github external
parser.add_argument(
        "-e", "--error-probability", default=0, type=float,
        help="The probability of observing an error")
    parser.add_argument(
        "-t", "--threads", default=1, type=int,
        help="The number of worker threads to use")
    parser.add_argument(
        "-m", "--method", default="C", choices=['C','P'],
        help="Which implementation to use, [C] (faster) or [P]ython (more debuggable)")
    parser.add_argument(
        "--inject-real-ancestors-from-ts", default=None,
        help="Instead of inferring ancestors, construct known ones from this tree sequence file path")
    
    args = parser.parse_args()
    try:
        sample_data = tsinfer.SampleData.load(args.samples)
    except dbm.error:
        raise ValueError("No samples file")
    if args.inject_real_ancestors_from_ts is not None:
        orig_ts = msprime.load(args.inject_real_ancestors_from_ts)
        ancestor_data = formats.AncestorData.initialise(sample_data, compressor=None)
        evaluation.build_simulated_ancestors(sample_data, ancestor_data, orig_ts)
        ancestor_data.finalise()
        ancestors_ts = tsinfer.match_ancestors(
            sample_data, ancestor_data, method=args.method, 
            path_compression=args.shared_recombinations)
        ts = tsinfer.match_samples(
            sample_data, ancestors_ts, method=args.method, 
            path_compression=args.shared_recombinations,
            simplify=True)
    else:
        ancestor_data = formats.AncestorData.initialise(sample_data, compressor=None)
github tskit-dev / tsinfer / tsinfer / cli.py View on Github external
def run_infer(args):
    setup_logging(args)
    progress_monitor = ProgressMonitor(
        enabled=args.progress,
        generate_ancestors=True,
        match_ancestors=True,
        match_samples=True,
    )
    sample_data = tsinfer.SampleData.load(args.samples)
    ts = tsinfer.infer(
        sample_data, progress_monitor=progress_monitor, num_threads=args.num_threads
    )
    output_trees = get_output_trees_path(args.output_trees, args.samples)
    logger.info("Writing output tree sequence to {}".format(output_trees))
    ts.dump(output_trees)
    summarise_usage()
github mcveanlab / treeseq-inference / src / evaluation.py View on Github external
ret = self.__run_ARGweaver(skip_infer = metrics_only)
        elif self.tool == RENTPLUS:
            ret = self.__run_RentPlus(skip_infer = metrics_only)
        else:
            raise ValueError("unknown tool {}".format(self.tool))
        if self.inferred_filenames is None:
            logging.info("No inferred tree files so metrics skipped for {} row {} = {}".format(
                self.tool, int(self.row[0]), ret))
        else:
            for metric in self.metric_params:
            #NB Jerome thinks it may be clearer to have get_metrics() return a single set of metrics
            #rather than an average over multiple inferred nexus files, and do the averaging in python
                if metric & METRICS_LOCATION_VARIANTS:
                    #get positions from the samples store, for use in metric calcs
                    try:
                        positions = tsinfer.SampleData.load(path=self.cmp_fn + ".samples").sites_position[:].tolist()
                    except FileNotFoundError:
                        #no such file exists, could be a case with no sites
                        continue
                else:
                    positions = None
                source_nexus_file = self.cmp_fn + ".nex"
                inferred_nexus_files = [fn + ".nex" for fn in self.inferred_filenames]
                #here we should create a separate set of metrics for tsinfer with and without polytomy breaking
                #we should check if it is TSINFER, and then prepend '' for the default metric and ''
                if metric & METRICS_POLYTOMIES_BREAK:
                    metrics = []
                    for i in range(self.polytomy_reps):
                        metrics.append(ARG_metrics.get_metrics(
                            source_nexus_file, inferred_nexus_files, variant_positions = positions,
                            randomly_resolve_inferred = int(self.row.seed)+i*11))
                    metrics = pd.DataFrame(metrics).mean().to_dict()
github tskit-dev / tsinfer / tsinfer / cli.py View on Github external
def run_match_ancestors(args):
    setup_logging(args)
    ancestors_path = get_ancestors_path(args.ancestors, args.samples)
    logger.info("Loading ancestral haplotypes from {}".format(ancestors_path))
    ancestors_trees = get_ancestors_trees_path(args.ancestors_trees, args.samples)
    sample_data = tsinfer.SampleData.load(args.samples)
    ancestor_data = tsinfer.AncestorData.load(ancestors_path)
    progress_monitor = ProgressMonitor(enabled=args.progress, match_ancestors=True)
    ts = tsinfer.match_ancestors(
        sample_data,
        ancestor_data,
        num_threads=args.num_threads,
        progress_monitor=progress_monitor,
        path_compression=not args.no_path_compression,
    )
    logger.info("Writing ancestors tree sequence to {}".format(ancestors_trees))
    ts.dump(ancestors_trees)
    summarise_usage()
github tskit-dev / tsinfer / tsinfer / cli.py View on Github external
def run_verify(args):
    setup_logging(args)
    samples = tsinfer.SampleData.load(args.samples)
    ts = tskit.load(args.tree_sequence)
    progress_monitor = ProgressMonitor(enabled=args.progress, verify=True)
    tsinfer.verify(samples, ts, progress_monitor=progress_monitor)
    summarise_usage()
github tskit-dev / tsinfer / convert_hdf5.py View on Github external
def main(infile, outfile):
    sample_data = tsinfer.SampleData.load(infile)
    print(sample_data)
    shape = (sample_data.num_inference_sites, sample_data.num_samples)
    G = np.empty(shape, dtype=np.int8)
    for j, (_, genotypes) in enumerate(sample_data.genotypes(inference_sites=True)):
        G[j] = genotypes
    with h5py.File(outfile, "w") as root:
        root["haplotypes"] = G.T
github mcveanlab / treeseq-inference / src / plots.py View on Github external
ret = self.__run_ARGweaver(skip_infer = metrics_only)
        elif self.tool == RENTPLUS:
            ret = self.__run_RentPlus(skip_infer = metrics_only)
        else:
            raise ValueError("unknown tool {}".format(self.tool))
        if self.inferred_filenames is None:
            logging.info("No inferred tree files so metrics skipped for {} row {} = {}".format(
                self.tool, int(self.row[0]), ret))
        else:
            for metric in self.metric_params:
            #NB Jerome thinks it may be clearer to have get_metrics() return a single set of metrics
            #rather than an average over multiple inferred nexus files, and do the averaging in python
                if metric & METRICS_LOCATION_VARIANTS:
                    #get positions from the samples store, for use in metric calcs
                    try:
                        positions = tsinfer.SampleData.load(path=self.cmp_fn + ".samples").sites_position[:].tolist()
                    except tsinfer.exceptions.FileFormatError:
                        #no such file exists, could be a case with no sites
                        continue
                else:
                    positions = None
                source_nexus_file = self.cmp_fn + ".nex"
                inferred_nexus_files = [fn + ".nex" for fn in self.inferred_filenames]
                #here we should create a separate set of metrics for tsinfer with and without polytomy breaking
                #we should check if it is TSINFER, and then prepend '' for the default metric and ''
                if metric & METRICS_POLYTOMIES_BREAK:
                    metrics = []
                    for i in range(self.polytomy_reps):
                        metrics.append(ARG_metrics.get_metrics(
                            source_nexus_file, inferred_nexus_files, variant_positions = positions,
                            randomly_resolve_inferred = int(self.row.seed)+i*11))
                    metrics = pd.DataFrame(metrics).mean().to_dict()