How to use the hail.tarray function in hail

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hail-is / hail / hail / python / scripts / drive_combiner.py View on Github external
'The sample map should be tab separated with two columns. '
                             'The first column is the sample ID, and the second column '
                             'is the gVCF path.\n'
                             'WARNING: the sample names in the gVCFs will be overwritten',
                        required=True)
    parser.add_argument('--tmp-path', help='path to folder for temp output (can be a cloud bucket)',
                        default='/tmp')
    parser.add_argument('--out-file', '-o', help='path to final combiner output', required=True)
    parser.add_argument('--json', help='json to use for the import of the gVCFs'
                                       '(must be filesystem local)', required=True)
    parser.add_argument('--header', help='external header, must be cloud based', required=False)
    args = parser.parse_args()
    hl.init(default_reference=DEFAULT_REF,
            log='/hail-joint-caller-' + time.strftime('%Y%m%d-%H%M') + '.log')
    with open(args.json) as j:
        ty = hl.tarray(hl.tinterval(hl.tstruct(locus=hl.tlocus(reference_genome='GRCh38'))))
        intervals = ty._from_json(j.read())
    with open(args.sample_map) as m:
        samples = [l.strip().split('\t') for l in m]
    run_combiner(samples, intervals, args.out_file, args.tmp_path, args.header, overwrite=True)
github hail-is / hail / hail / python / hail / expr / generic_summary.py View on Github external
elif isinstance(t, hl.tlocus):
            contig_counts = append_agg(c, hl.agg.filter(hl.is_defined(expr), hl.agg.counter(expr.contig)))
            d['contig counts'] = lambda results: format(index_with_path(results, contig_counts))
        elif isinstance(t, (hl.tset, hl.tdict, hl.tarray)):
            size = append_agg(c, hl.agg.stats(hl.len(expr)))
            d['minimum size'] = lambda results: format(map_int(results[size]['min']))
            d['maximum size'] = lambda results: format(map_int(results[size]['max']))
            d['mean size'] = lambda results: format(results[size]['mean'])
        to_print.append((context, d))
        if isinstance(t, hl.ttuple):
            for i in range(len(expr)):
                recur_expr(expr[i], f'{context}[{i}]', path, c)
        if isinstance(t, hl.tstruct):
            for k, v in expr.items():
                recur_expr(v, f'{context}[{repr(k)}]', path, c)
        if isinstance(t, (hl.tset, hl.tarray)):
            def explode_f(x):
                c2 = Computations()
                new_path = path + (c.n,)
                recur_expr(x, f'{context}[]', new_path, c2)
                return c2.result()

            append_agg(c, hl.agg.explode(explode_f, expr))
        if isinstance(t, hl.tdict):
            def explode_f(x):
                c2 = Computations()
                new_path = path + (c.n,)
                recur_expr(x[0], f'{context}[]', new_path, c2)
                recur_expr(x[1], f'{context}[]', new_path, c2)
                return c2.result()

            append_agg(c, hl.agg.explode(explode_f, hl.array(expr)))
github hail-is / hail / hail / python / hail / ir / matrix_reader.py View on Github external
def __init__(self, path, intervals, filter_intervals):
        if intervals is not None:
            t = hl.expr.impute_type(intervals)
            if not isinstance(t, hl.tarray) and not isinstance(t.element_type, hl.tinterval):
                raise TypeError("'intervals' must be an array of tintervals")
            pt = t.element_type.point_type
            if isinstance(pt, hl.tstruct):
                self._interval_type = t
            else:
                self._interval_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt)))

        self.path = path
        self.filter_intervals = filter_intervals
        if intervals is not None and t != self._interval_type:
            self.intervals = [hl.Interval(hl.Struct(__point=i.start),
                                          hl.Struct(__point=i.end),
                                          i.includes_start,
                                          i.includes_end) for i in intervals]
        else:
            self.intervals = intervals
github hail-is / hail / hail / python / hail / methods / impex.py View on Github external
The ``includes_start`` and ``includes_end`` keys must be ``True``. The
    ``contig`` fields must be the same.

    One difference between :func:`.import_gvcfs` and :func:`.import_vcf` is that
    :func:`.import_gvcfs` only keys the resulting matrix tables by ``locus``
    rather than ``locus, alleles``.
    """

    rg = reference_genome.name if reference_genome else None

    global _cached_importvcfs
    if _cached_importvcfs is None:
        _cached_importvcfs = Env.hail().io.vcf.ImportVCFs

    if partitions is not None:
        partitions, partitions_type = hl.utils._dumps_partitions(partitions, hl.tstruct(locus=hl.tlocus(rg), alleles=hl.tarray(hl.tstr)))
    else:
        partitions_type = None

    vector_ref_s = _cached_importvcfs.pyApply(
        wrap_to_list(path),
        wrap_to_list(call_fields),
        entry_float_type._parsable_string(),
        rg,
        contig_recoding,
        array_elements_required,
        skip_invalid_loci,
        force_bgz,
        force,
        partitions, partitions_type._parsable_string(),
        filter,
        find_replace[0] if find_replace is not None else None,
github hail-is / hail / hail / python / hail / ir / register_functions.py View on Github external
def array_floating_point_divide(arg_type, ret_type):
        register_function("/", (arg_type, hl.tarray(arg_type),), hl.tarray(ret_type))
        register_function("/", (hl.tarray(arg_type),arg_type), hl.tarray(ret_type))
        register_function("/", (hl.tarray(arg_type),hl.tarray(arg_type)), hl.tarray(ret_type))
    array_floating_point_divide(hl.tint32, hl.tfloat32)
github hail-is / hail / hail / python / hail / ir / table_ir.py View on Github external
(child_typ.row_key_type
                 ._insert_fields(**{f: child_typ.row_type[f] for f in pass_through})
                 ._concat(poisreg_type)),
                child_typ.row_key)
        elif name == 'Skat':
            key_field = self.config['keyField']
            key_type = child_typ.row_type[key_field]
            skat_type = hl.dtype(f'struct{{id:{key_type},size:int32,q_stat:float64,p_value:float64,fault:int32}}')
            self._type = hl.ttable(
                hl.tstruct(),
                skat_type,
                ['id'])
        elif name == 'PCA':
            self._type = hl.ttable(
                hl.tstruct(eigenvalues=hl.tarray(hl.tfloat64),
                           scores=hl.tarray(child_typ.col_key_type._insert_field('scores', hl.tarray(hl.tfloat64)))),
                child_typ.row_key_type._insert_field('loadings', dtype('array')),
                child_typ.row_key)
        elif name == 'IBD':
            ibd_info_type = hl.tstruct(Z0=hl.tfloat64, Z1=hl.tfloat64, Z2=hl.tfloat64, PI_HAT=hl.tfloat64)
            ibd_type = hl.tstruct(i=hl.tstr,
                                  j=hl.tstr,
                                  ibd=ibd_info_type,
                                  ibs0=hl.tint64,
                                  ibs1=hl.tint64,
                                  ibs2=hl.tint64)
            self._type = hl.ttable(
                hl.tstruct(),
                ibd_type,
                ['i', 'j'])
        else:
            assert name == 'LocalLDPrune', name
github hail-is / hail / hail / python / hail / ir / matrix_ir.py View on Github external
def _compute_type(self):
        child_typ = self.child.typ
        if self.product:
            value_type = hl.tarray(self.table.typ.value_type)
        else:
            value_type = self.table.typ.value_type
        self._type = hl.tmatrix(
            child_typ.global_type,
            child_typ.col_type,
            child_typ.col_key,
            child_typ.row_type._insert_field(self.root, value_type),
            child_typ.row_key,
            child_typ.entry_type)
github hail-is / hail / hail / python / hail / ir / matrix_reader.py View on Github external
def __init__(self, path, intervals, filter_intervals):
        if intervals is not None:
            t = hl.expr.impute_type(intervals)
            if not isinstance(t, hl.tarray) and not isinstance(t.element_type, hl.tinterval):
                raise TypeError("'intervals' must be an array of tintervals")
            pt = t.element_type.point_type
            if isinstance(pt, hl.tstruct):
                self._interval_type = t
            else:
                self._interval_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt)))

        self.path = path
        self.filter_intervals = filter_intervals
        if intervals is not None and t != self._interval_type:
            self.intervals = [hl.Interval(hl.Struct(__point=i.start),
                                          hl.Struct(__point=i.end),
                                          i.includes_start,
                                          i.includes_end) for i in intervals]
        else:
            self.intervals = intervals
github macarthur-lab / gnomad_hail / utils / generic.py View on Github external
"""
        Returns phased genotype calls in the non-PAR region of Y (requires both father and proband to be haploid to return phase)

        :param CallExpression proband_call: Input proband genotype call
        :param CallExpression father_call: Input father genotype call
        :return: Array containing: phased proband call, phased father call, phased mother call
        :rtype: ArrayExpression
        """
        return hl.cond(
            proband_call.is_haploid() & father_call.is_haploid() & (father_call[0] == proband_call[0]),
            hl.array([
                hl.call(proband_call[0], phased=True),
                hl.call(father_call[0], phased=True),
                hl.null(hl.tcall)
            ]),
            hl.null(hl.tarray(hl.tcall))
        )