How to use the hail.struct function in hail

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github macarthur-lab / gnomadjs / data / gnomad_sv / prepare_gnomad_svs.py View on Github external
xpos2=x_position(ds.info.CHR2, ds.info.POS2),
        # End 2
        end2=ds.info.END2,
        xend2=x_position(ds.info.CHR2, ds.info.END2),
        # Other
        length=ds.info.SVLEN,
        type=ds.info.SVTYPE,
        alts=ds.alleles[1:],
    )

    # MULTIALLELIC should not be used as a quality filter in the browser
    ds = ds.annotate(filters=ds.filters.difference(hl.set(["MULTIALLELIC"])))

    # Group gene lists for all consequences in a struct
    ds = ds.annotate(
        consequences=hl.struct(
            **{
                csq.lower(): ds.info[f"PROTEIN_CODING__{csq}"]
                for csq in PROTEIN_CODING_CONSEQUENCES
                if csq != "INTERGENIC" and csq != "NEAREST_TSS"
            }
        )
    )
    ds = ds.annotate(intergenic=ds.info.PROTEIN_CODING__INTERGENIC)

    # Collect set of all genes for which a variant has a consequence
    all_genes = hl.empty_array(hl.tstr)
    for csq in ds.consequences.dtype.fields:
        all_genes = all_genes.extend(hl.or_else(ds.consequences[csq.lower()], hl.empty_array(hl.tstr)))
    ds = ds.annotate(genes=hl.set(all_genes))

    # Group per-population values in a struct for each field
github macarthur-lab / gnomad_hail / gnomad / utils / gnomad_functions.py View on Github external
def add_variant_type(alt_alleles: hl.expr.ArrayExpression) -> hl.expr.StructExpression:
    """
    Get Struct of variant_type and n_alt_alleles from ArrayExpression of Strings (all alleles)
    """
    ref = alt_alleles[0]
    alts = alt_alleles[1:]
    non_star_alleles = hl.filter(lambda a: a != '*', alts)
    return hl.struct(variant_type=hl.cond(
        hl.all(lambda a: hl.is_snp(ref, a), non_star_alleles),
        hl.cond(hl.len(non_star_alleles) > 1, "multi-snv", "snv"),
        hl.cond(
            hl.all(lambda a: hl.is_indel(ref, a), non_star_alleles),
            hl.cond(hl.len(non_star_alleles) > 1, "multi-indel", "indel"),
            "mixed")
    ), n_alt_alleles=hl.len(non_star_alleles))
github macarthur-lab / gnomadjs / data / gnomad_r2_1 / prepare_gnomad_mnvs_for_browser.py View on Github external
mnv_in_genome=ds.constituent_snvs.all(lambda s: hl.is_defined(s.genome)),
    )

    ds = ds.transmute(
        n_individuals=ds.n_indv_tnv,
        ac=ds.AC_tnv,
        ac_hom=ds.n_tnv_hom,
        exome=hl.or_missing(
            ds.mnv_in_exome,
            hl.struct(
                n_individuals=ds.n_indv_tnv_ex, ac=ds.AC_tnv_ex, ac_hom=ds.n_tnv_hom_ex
            ),
        ),
        genome=hl.or_missing(
            ds.mnv_in_genome,
            hl.struct(
                n_individuals=ds.n_indv_tnv_gen,
                ac=ds.AC_tnv_gen,
                ac_hom=ds.n_tnv_hom_gen,
            ),
        ),
    )

    ds = ds.drop("AC_snp1", "AC_snp2", "AC_snp3")

    ds = ds.transmute(
        consequence=hl.struct(
            category=hl.null(hl.tstr),
            gene_id=ds.gene_id,
            gene_name=ds.gene_name,
            transcript_id=ds.transcript_id,
            consequence=ds.tnv_cons,
github macarthur-lab / gnomadjs / data / exac / export_exac_vcf_to_ht.py View on Github external
                    lambda csq_values: hl.struct(
                        **{
                            field: hl.cond(csq_values[index] == "", hl.null(hl.tstr), csq_values[index])
                            for index, field in enumerate(VEP_FIELDS)
                        }
github hail-is / hail / hail / python / hail / matrixtable.py View on Github external
:meth:`.result`

        Parameters
        ----------
        named_exprs : varargs of :class:`.Expression`
            Aggregation expressions.

        Returns
        -------
        :class:`.GroupedMatrixTable`
        """
        if self._row_keys is not None:
            raise NotImplementedError("GroupedMatrixTable is already grouped by rows. Cannot aggregate over cols.")
        assert self._col_keys is not None

        base = self._col_fields if self._col_fields is not None else hl.struct()
        for k, e in named_exprs.items():
            analyze('GroupedMatrixTable.aggregate_cols', e, self._parent._global_indices, {self._parent._col_axis})

        self._check_bindings('aggregate_cols', named_exprs, self._parent._col_indices)
        return self._copy(col_fields = base.annotate(**named_exprs))
github macarthur-lab / gnomadjs / data / gnomad_r2_1 / prepare_gnomad_r2_1_variants.py View on Github external
# Subset labels contain an _, so rebuild those after splitting them
        if labels[0] == "non":
            labels = ["_".join(labels[0:2])] + labels[2:]

        if len(labels) == 2:
            [subset, pop] = labels
            faf_index_tree[subset][pop] = index
        else:
            assert len(labels) == 1
            subset = labels[0]
            faf_index_tree[subset]["total"] = index

    ds = ds.annotate(
        **{
            subset: ds[subset].annotate(
                faf95_adj=hl.struct(**{pop: ds.faf[index].faf95 for pop, index in faf_index_tree[subset].items()}),
                faf99_adj=hl.struct(**{pop: ds.faf[index].faf99 for pop, index in faf_index_tree[subset].items()}),
            )
            for subset in subsets
        }
    )

    ds = ds.drop("freq", "popmax", "faf")

    ##############
    # Histograms #
    ##############

    # Extract overall age distribution
    ds = ds.transmute(
        gnomad_age_hist_het=ds.age_hist_het[g.age_index_dict["gnomad"]],
        gnomad_age_hist_hom=ds.age_hist_hom[g.age_index_dict["gnomad"]],
github macarthur-lab / gnomad_hail / utils / generic.py View on Github external
                .map(lambda m: hl.struct(m=m[0], f=f[0])),
            hl.zip_with_index(father_v)
github macarthur-lab / gnomadjs / data / prepare_pext_for_browser.py View on Github external
output_region(current_region._replace(stop=last_pos))


#
# Step 3: Convert regions to a Hail table.
#
types = {t: hl.tfloat for t in tissue_fields}
types["gene_id"] = hl.tstr
types["chrom"] = hl.tstr
types["start"] = hl.tint
types["stop"] = hl.tint
types["mean"] = hl.tfloat

ds = hl.import_table("regions.tsv", min_partitions=100, missing="", types=types)

ds = ds.select("gene_id", "chrom", "start", "stop", "mean", tissues=hl.struct(**{t: ds[t] for t in tissue_fields}))

ds.write(args.output_ht)
github hail-is / hail / hail / python / hail / experimental / sparse_split_multi.py View on Github external
return hl.bind(lambda mr:
                       (hl.case()
                        .when(ds.locus == mr.locus,
                              hl.struct(
                                  locus=ds.locus,
                                  alleles=[mr.alleles[0], mr.alleles[1]],
                                  a_index=i,
                                  was_split=True))
                        .when(filter_changed_loci,
                              hl.null(hl.tstruct(locus=ds.locus.dtype, alleles=hl.tarray(hl.tstr),
                                                 a_index=hl.tint, was_split=hl.tbool)))
                        .or_error(
                            "Found non-left-aligned variant in sparse_split_multi\n"
                            + "old locus: " + hl.str(ds.locus) + "\n"
                            + "old ref  : " + ds.alleles[0] + "\n"
                            + "old alt  : " + ds.alleles[i] + "\n"
                            + "mr locus : " + hl.str(mr.locus) + "\n"
                            + "mr ref   : " + mr.alleles[0] + "\n"
                            + "mr alt   : " + mr.alleles[1]
                            )),
github macarthur-lab / gnomadjs / data / prepare_gnomad_r2_1_constraint_for_browser.py View on Github external
ds = ds.select_globals()

population_dict_fields = [
    "pop_no_lofs",
    "pop_obs_het_lof",
    "pop_obs_hom_lof",
    "pop_defined",
    "pop_p",
]

populations = ["afr", "amr", "asj", "eas", "fin", "nfe", "oth", "sas"]

# Convert dicts to structs for Elasticsearch export
ds = ds.annotate(
    **{
        f: hl.struct(**{pop: ds[f][pop] for pop in populations})
        for f in population_dict_fields
    }
)

# Convert interval to struct for Elasticsearch export
ds = ds.annotate(
    interval=hl.struct(
        chrom=ds.interval.start.contig,
        start=ds.interval.start.position,
        end=ds.interval.end.position,
    )
)

ds = ds.key_by()
ds = ds.transmute(gene_name=ds.gene, transcript_id=ds.transcript)