How to use the msprime.load function in msprime

To help you get started, we’ve selected a few msprime examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mcveanlab / treeseq-inference / scripts / selection_tests.py View on Github external
recombination_rate=1e-6
mutation_rate=1e-6
ts = msprime.simulate(10000, 
        Ne=5000, length=10000, recombination_rate=recombination_rate)        
#or try generating an identical neutral ftprime simulation using 
# python3 ./examples/examples.py -T 10000 -N 5000 -r 1e-6 -L 10000 -a 0.0001 -b 0.0001 -k 5000 -t neutral_ts
T_W, T_pi, T_H, D, FayWuH = [], [], [], [], []
incrementalSFS(ts, save_estimates)
plt = ax1
plt.plot(*zip(*D), marker="")
plt.plot(*zip(*FayWuH), marker="")
plt.legend(["Tajima's D", "Fay & Wu's H"])
plt.set_xlim(0,ts.get_sequence_length())
plt.set_title("msprime simulation")
#plt.set_ylim(-15000,15000)
ts = msprime.load("../ftprime/neutral_ts")
T_W, T_pi, T_H, D, FayWuH = [], [], [], [], []
incrementalSFS(ts, save_estimates)
plt = ax2
plt.plot(*zip(*D), marker="")
plt.plot(*zip(*FayWuH), marker="")
plt.legend(["Tajima's D", "Fay & Wu's H"])
plt.set_xlim(0,ts.get_sequence_length())
plt.set_title("ftprime equivalent (after 1000 generations)")
fig.savefig("SelectionMeasuresOnNeutral.pdf")

#selective example
#e.g. from python3 ./examples/selective_sweep.py -N 10000 -r 1e-8 -L 100000 -k 10000 -s 0.05 -of 0.1 -of 0.5 -of 0.8 -of 1 -of 1 200 -of 1 600 -d 20 -v
fig, ((ax1, ax2, ax3),(ax4, ax5,ax6)) = pyplot.subplots(2, 3, sharey=True, figsize=(21, 11))

for f1, f2, plt in zip(
    ["0.1","0.5","0.8","1","1","1"],
github tskit-dev / msprime / tests / test_simulate_from.py View on Github external
def test_single_locus_example_recombination(self):
        from_ts = msprime.load("tests/data/SLiM/single-locus-example.trees")
        ts = self.finish_simulation(from_ts, recombination_rate=0.1, seed=1)
        self.verify_completed(from_ts, ts)
github tskit-dev / msprime / tests / test_highlevel.py View on Github external
def verify_dump_load(self, tree_sequence):
        """
        Dump the tree sequence and verify we can load again from the same
        file.
        """
        tree_sequence.dump(self.temp_file)
        other = msprime.load(self.temp_file)
        self.assertIsNotNone(other.file_uuid)
        records = list(tree_sequence.edges())
        other_records = list(other.edges())
        self.assertEqual(records, other_records)
        haplotypes = list(tree_sequence.haplotypes())
        other_haplotypes = list(other.haplotypes())
        self.assertEqual(haplotypes, other_haplotypes)
github armartin / ancestry_pipeline / simulate_prs.py View on Github external
def main(args):
    nhaps = map(int, args.nhaps.split(','))
    recomb = args.recomb_map
    ncausal = args.ncausal
    
    # generate/load coalescent simulations
    if args.tree is None:
        (pop_config, mig_mat, demog) = out_of_africa(nhaps)
        simulation = simulate_ooa(pop_config, mig_mat, demog, recomb)
        simulation.dump(args.out+ '_nhaps_' + '_'.join(map(str, nhaps)) + '.hdf5', True)
    else:
        simulation = msprime.load(args.tree)
    
    eprint(simulation)
    eprint('Number of haplotypes: ' + ','.join(map(str, nhaps)))
    eprint('Number of trees: ' + str(simulation.get_num_trees()))
    eprint('Number of mutations: ' + str(simulation.get_num_mutations()))
    eprint('Sequence length: ' + str(simulation.get_sequence_length()))

    
    prs_true = true_prs(simulation, args.ncausal, args.h2, nhaps, args.out)
    cases_diploid, controls_diploid, prs_norm, environment = case_control(prs_true, args.h2, nhaps, args.prevalence, args.ncontrols, args.out)
    summary_stats, cases_haploid, controls_haploid = run_gwas(simulation, cases_diploid, controls_diploid, args.p_threshold, args.cc_maf)
    clumped_snps, usable_positions = clump_variants(simulation, summary_stats, nhaps, args.r2, args.window_size)
    prs_infer = infer_prs(simulation, nhaps, clumped_snps, summary_stats, usable_positions, args.h2, args.ncausal, args.out)
    write_summaries(args.out, prs_true, prs_infer, nhaps, cases_diploid, controls_diploid, args.h2, args.ncausal, environment)
github mcveanlab / treeseq-inference / src / ts_ARGweaver.py View on Github external
def tsfile_to_ARGweaver_in(trees, ARGweaver_filehandle):
    """
    take a .trees file, and convert it into an input file suitable for ARGweaver
    Returns the simulation parameters (Ne, mu, r) used to create the .trees file
    """
    logging.info("== Saving to ARGweaver input format ==")
    try:
        ts = msprime.load(trees.name) #trees is a fh
    except AttributeError:
        ts = msprime.load(trees)
    ts_to_ARGweaver_in(ts, ARGweaver_filehandle)
    #here we should extract the /provenance information from the .trees file and return 
    # {'Ne':XXX, 'mutation_rate':XXX, 'recombination_rate':XXX}
    #but this information is currently not encoded in the .trees file (listed as TODO)

    return {'Ne':None, 'mutation_rate':None, 'recombination_rate':None}
github tskit-dev / tsinfer / dev.py View on Github external
def build_profile_inputs(n, num_megabases):
    L = num_megabases * 10 ** 6
    input_file = "tmp__NOBACKUP__/profile-n={}-m={}.input.trees".format(
        n, num_megabases
    )
    if os.path.exists(input_file):
        ts = msprime.load(input_file)
    else:
        ts = msprime.simulate(
            n,
            length=L,
            Ne=10 ** 4,
            recombination_rate=1e-8,
            mutation_rate=1e-8,
            random_seed=10,
        )
        print(
            "Ran simulation: n = ",
            n,
            " num_sites = ",
            ts.num_sites,
            "num_trees =",
            ts.num_trees,
github tskit-dev / msprime / dev.py View on Github external
msprime.PopulationConfiguration(sample_size=1000)],
            demographic_events=[
                msprime.MassMigration(time=t, source=1, destination=0),
                msprime.MassMigration(time=t, source=2, destination=0),
                msprime.MassMigration(time=t, source=3, destination=0),
                msprime.MassMigration(time=t, source=4, destination=0)],
            length=100 * 1e6,
            recombination_rate=2e-8,
            mutation_rate=2e-8,
            random_seed=1)
        ts.dump("populations.hdf5")
        print(
            ts.get_sample_size(), ts.get_num_trees(),
            ts.get_num_mutations())
    else:
        ts = msprime.load("populations.hdf5")
        before = time.clock()
        R = 1
        for i in range(R):
            for j in range(5):
                samples = ts.get_samples(population_id=j)
                pi = ts.get_pairwise_diversity(samples)
                # pi2 = ts.get_pairwise_diversity2(samples)
                # print(j, pi, pi2, pi == pi2)
                # print(j, pi2)
        duration = time.clock() - before
        print("duration = ", duration, " per call = ", duration / (5 * R))
github mcveanlab / treeseq-inference / src / plots.py View on Github external
shared_recombinations, num_threads=1, inject_real_ancestors_from_ts_fn=None, rho=None, error_probability=None):
            with tempfile.NamedTemporaryFile("w+") as ts_out:
                cmd = [sys.executable, tsinfer_executable, sample_fn, "--length", str(int(length))]
                if rho is not None:
                    cmd += ["--recombination-rate", str(rho)]
                if error_probability is not None:
                    cmd += ["--error-probability", str(error_probability)]
                cmd += ["--threads", str(num_threads), ts_out.name]
                if inject_real_ancestors_from_ts_fn:
                    logging.debug("Injecting real ancestors constructed from {}".format(
                        inject_real_ancestors_from_ts_fn))
                    cmd.extend(["--inject-real-ancestors-from-ts", inject_real_ancestors_from_ts_fn])
                if shared_recombinations:
                    cmd.append("--shared-recombinations")
                cpu_time, memory_use = time_cmd(cmd)
                ts_simplified = msprime.load(ts_out.name)
            return ts_simplified, cpu_time, memory_use
github tskit-dev / msprime / dev.py View on Github external
def examine():
    ts = msprime.load("tmp__NOBACKUP__/bottleneck-example.hdf5")
    print("num_records = ", ts.get_num_records())
    non_binary_records = 0
    max_record_length = 0
    for r in ts.records():
        if len(r.children) > 2:
            non_binary_records +=1
            max_record_length = max(max_record_length, len(r.children))
    print("non_binary_records = ", non_binary_records)
    print("max_record_length = ", max_record_length)
    num_nodes = collections.Counter()
    num_trees = 0
    for t in ts.trees():
        num_nodes[len(list(t.nodes(t.get_root())))] += 1
        num_trees += 1
    print("num_trees = ", num_trees)
    for k, v in num_nodes.items():
github mcveanlab / treeseq-inference / scripts / plot_inferred_1000G_trees_for_movie.py View on Github external
for node in tree.nodes():
        if node in colours:
            for c in tree.get_children(node):
                ret_colours[c]=colours[node]
    return ret_colours

#from http://www.internationalgenome.org
pop_cols = {"African":"yellow", "American": "red", "East Asian":"green","European":"blue", "South Asian":"purple"}
sample_cols = {}
with open("/Volumes/SDdisk/1000G/igsr_samples.tsv") as tsvfile:
    reader = csv.DictReader(tsvfile, delimiter='\t')
    for row in reader:
        if 'phase 1' in row['Data collections']:
            sample_cols[row['Sample name']]= pop_cols[row['Superpopulation name']]

ts = msprime.load(args.msprime_infile).simplify()
#sample names are not stored in msprime files, so we need to get them from the base file
#sample names in the base file have an extra character ('a' or 'b') appended
with h5py.File(args.base_file, "r") as f:
    data = f['data']
    samples = data['samples']
    sample_colours = {i:sample_cols[str(d, 'utf8')[:-1]] for i,d in enumerate(data['samples']) if str(d, 'utf8')[-1] in 'ab'}

for i, t in enumerate(ts.trees()):
    if i==0:
        start = t.get_interval()[1]
    branch_colours = {},{}
    node_colours=sample_colours.copy()
    percolate_unambiguous_colours(t, node_colours, t.get_root(), )
    branch_colours = colour_children(t, node_colours)
    file = os.path.join(args.outdir,"tree{:05d}.svg".format(i))
    svg=t.draw(file,