How to use the zarr.DBMStore function in zarr

To help you get started, we’ve selected a few zarr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tskit-dev / tsinfer / vcf2tsinfer.py View on Github external
genetic_pos = genetic_map.physical_to_genetic(physical_pos)
                physical_dist = physical_pos - last_physical_pos
                genetic_dist = genetic_pos - last_genetic_pos
                scaled_recomb_rate = 0
                if genetic_dist > 0:
                    scaled_recomb_rate = physical_dist / genetic_dist
                recombination_rates[i]=scaled_recomb_rate
        except FileNotFoundError:
            print("Genetic map file {} not found, defaulting to constant recombination rate of {}".format(
                genetic_map_file, args.recombination_rate))


    output_file = args.outfile + str(c) + ".tsinf"
    if os.path.exists(output_file):
        os.unlink(output_file)
    input_hdf5 = zarr.DBMStore(output_file, open=bsddb3.btopen)
    root = zarr.group(store=input_hdf5, overwrite=True)
    tsinfer.InputFile.build(
        root, 
        genotypes=sites_by_samples,
        position=list(dat['position'].keys()),
        recombination_rate=recombination_rates)
    #sample_names=[s.encode() for s in reduced_rows]
    input_hdf5.close()
    print("Saved {} biallelic loci for {} samples into {}".format(len(dat['position']), len(reduced_rows), output_file))


"""
Then do something like
github tskit-dev / tsinfer / run_tsinf.py View on Github external
'if more than one input file, will append ".0", ".1", ".2" etc to the name.')
parser.add_argument('-P', '--progress', action='store_true',
                    help='Show a progress bar.')
args = parser.parse_args()

method, path_compression, simplify = "C", True, True #set defaults

for i, fn in enumerate(args.infiles):
    ext = ('.' + str(i)) if len(args.infiles) > 1 else ''
    if args.outfile:
        out_fn = args.outfile + ext
    else:
        out_fn = os.path.splitext(fn)[0] + '.hdf5'
    if not os.path.isfile(fn):
        raise FileNotFoundError
    input_hdf5 = zarr.DBMStore(fn, open=bsddb3.btopen)
    input_root = zarr.group(store=input_hdf5)
    
    ancestors_root = zarr.group()
    tsinfer.build_ancestors(
        input_root, ancestors_root, method=method, chunk_size=16, compress=False,
        progress = args.progress)
    ancestors_ts = tsinfer.match_ancestors(
        input_root, ancestors_root, method=method, path_compression=path_compression,
        progress = args.progress)
    full_inferred_ts = tsinfer.match_samples(
        input_root, ancestors_ts, method=method, path_compression=path_compression,
        simplify=simplify, progress = args.progress)
    full_inferred_ts.dump(out_fn)