How to use the hail.or_else function in hail

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github macarthur-lab / gnomad_hail / gnomad / utils / vep.py View on Github external
+ hl.format("%.3f", element.sift_score)
                    + ")",
                    "polyphen": element.polyphen_prediction
                    + "("
                    + hl.format("%.3f", element.polyphen_score)
                    + ")",
                    "domains": hl.delimit(
                        element.domains.map(lambda d: d.db + ":" + d.name), "&"
                    ),
                }
            )
        elif feature_type == "MotifFeature":
            fields["motif_score_change"] = hl.format("%.3f", element.motif_score_change)

        return hl.delimit(
            [hl.or_else(hl.str(fields.get(f, "")), "") for f in _csq_fields], "|"
        )
github macarthur-lab / gnomad_hail / gnomad / utils / vep.py View on Github external
elif feature_type == "MotifFeature":
            fields["motif_score_change"] = hl.format("%.3f", element.motif_score_change)

        return hl.delimit(
            [hl.or_else(hl.str(fields.get(f, "")), "") for f in _csq_fields], "|"
        )

    csq = hl.empty_array(hl.tstr)
    for feature_field, feature_type in [
        ("transcript_consequences", "Transcript"),
        ("regulatory_feature_consequences", "RegulatoryFeature"),
        ("motif_feature_consequences", "MotifFeature"),
        ("intergenic_consequences", "Intergenic"),
    ]:
        csq = csq.extend(
            hl.or_else(
                vep_expr[feature_field].map(
                    lambda x: get_csq_from_struct(x, feature_type=feature_type)
                ),
                hl.empty_array(hl.tstr),
            )
        )

    return hl.or_missing(hl.len(csq) > 0, csq)
github macarthur-lab / gnomadjs / projects / exome-results-browsers / browsers / schema / data / prepare_variant_results.py View on Github external
transcript_id=hl.delimit(variants.transcript_id, ","),
        hgvsc=hl.delimit(variants.hgvsc.keys().map(lambda k: k + ":" + variants.hgvsc[k]), ","),
        hgvsp=hl.delimit(variants.hgvsp.keys().map(lambda k: k + ":" + variants.hgvsp[k]), ","),
    )

    variants = variants.annotate(flags="PASS")
    variants = variants.drop("v")

    results = hl.read_table(args.results)
    results = results.annotate(
        analysis_group=results.analysis_group.lower().replace("[^a-z0-9]+", "_").replace("_+$", "")
    )
    results = results.drop("v")

    # Add n_denovos to AC_case
    results = results.annotate(ac_case=hl.or_else(results.ac_case, 0) + hl.or_else(results.n_denovos, 0))
    results = results.annotate(af_case=hl.cond(results.an_case == 0, 0, results.ac_case / results.an_case))

    variants = variants.filter(hl.is_defined(results[variants.key]))

    analysis_groups = results.aggregate(hl.agg.collect_as_set(results.analysis_group))

    variants = variants.annotate(groups=hl.struct())
    for group in analysis_groups:
        group_results = results.filter(results.analysis_group == group).drop("analysis_group")
        variants = variants.annotate(groups=variants.groups.annotate(**{group: group_results[variants.key]}))

    # The latest (2019/04/15) SCHEMA dataset moved the source and in_analysis field from variant level to group level
    # in_analysis is the same for all groups within a variant, but source is not
    variants = variants.annotate(in_analysis=variants.groups.meta.in_analysis, source=variants.groups.meta.source)

    variants = variants.key_by().drop("locus", "alleles")
github macarthur-lab / gnomadjs / data / prepare_gene_models.py View on Github external
hgnc.previous_symbols == "",
            hl.empty_array(hl.tstr),
            hgnc.previous_symbols.split(",").map(lambda s: s.strip()),
        ),
        synonyms=hl.cond(
            hgnc.synonyms == "", hl.empty_array(hl.tstr), hgnc.synonyms.split(",").map(lambda s: s.strip())
        ),
    )

    genes = genes.annotate(**hgnc[genes.gene_id])
    genes = genes.annotate(symbol_source=hl.cond(hl.is_defined(genes.symbol), "hgnc", hl.null(hl.tstr)))

    # If an HGNC gene symbol was not present, use the symbol from Gencode
    for gencode_version in all_gencode_versions:
        genes = genes.annotate(
            symbol=hl.or_else(genes.symbol, genes.gencode[f"v{gencode_version}"].gene_symbol),
            symbol_source=hl.cond(
                hl.is_missing(genes.symbol) & hl.is_defined(genes.gencode[f"v{gencode_version}"].gene_symbol),
                f"gencode (v{gencode_version})",
                genes.symbol_source,
            ),
        )

    # Collect all fields that can be used to search by gene name
    genes = genes.annotate(
        symbol_upper_case=genes.symbol.upper(),
        search_terms=hl.empty_array(hl.tstr).append(genes.symbol).extend(genes.synonyms).extend(genes.previous_symbols),
    )
    for gencode_version in all_gencode_versions:
        genes = genes.annotate(
            search_terms=hl.rbind(
                genes.gencode[f"v{gencode_version}"].gene_symbol,
github hail-is / hail / hail / python / hail / experimental / ld_score_regression.py View on Github external
lambda i: hl.rbind(
                hl.int(hl.or_else(step1_indices[i], 0)),
                ht.__cols[i].__m_step1,
                ht.__entries[i],
                lambda step1_idx, m_step1, entry: hl.rbind(
                    hl.map(
                        lambda j: hl.int(hl.floor(j * (m_step1 / n_blocks))),
                        hl.range(0, n_blocks + 1)),
                    lambda step1_separators: hl.rbind(
                        hl.set(step1_separators).contains(step1_idx),
                        hl.sum(
                            hl.map(
                                lambda s1: step1_idx >= s1,
                                step1_separators)) - 1,
                        lambda is_separator, step1_block: entry.annotate(
                            __step1_block=step1_block,
                            __step2_block=hl.cond(~entry.__in_step1 & is_separator,
                                                  step1_block - 1,
github macarthur-lab / gnomadjs / data / data_utils / computed_fields / sorted_transcript_consequences_v2.py View on Github external
lambda c: (
            hl.bind(
                lambda is_coding, is_most_severe, is_canonical: (
                    hl.cond(
                        is_coding,
                        hl.cond(is_most_severe, hl.cond(is_canonical, 1, 2), hl.cond(is_canonical, 3, 4)),
                        hl.cond(is_most_severe, hl.cond(is_canonical, 5, 6), hl.cond(is_canonical, 7, 8)),
                    )
                ),
                hl.or_else(c.biotype, "") == "protein_coding",
                hl.set(c.consequence_terms).contains(vep_root.most_severe_consequence),
                hl.or_else(c.canonical, 0) == 1,
            )
github hail-is / hail / hail / python / hail / methods / family_methods.py View on Github external
t2 = tm.annotate_cols(
        errors=hl.agg.count(),
        snp_errors=hl.agg.count_where(hl.is_snp(tm.alleles[0], tm.alleles[1])))
    table2 = t2.key_cols_by().cols()
    table2 = table2.select(pat_id=table2.father[ck_name],
                           mat_id=table2.mother[ck_name],
                           fam_id=table2.fam_id,
                           errors=table2.errors,
                           snp_errors=table2.snp_errors)
    table2 = table2.group_by('pat_id', 'mat_id').aggregate(
        fam_id=hl.agg.take(table2.fam_id, 1)[0],
        children=hl.int32(hl.agg.count()),
        errors=hl.agg.sum(table2.errors),
        snp_errors=hl.agg.sum(table2.snp_errors))
    table2 = table2.annotate(errors=hl.or_else(table2.errors, hl.int64(0)),
                             snp_errors=hl.or_else(table2.snp_errors, hl.int64(0)))

    # in implicated, idx 0 is dad, idx 1 is mom, idx 2 is child
    implicated = hl.literal([
        [0, 0, 0],  # dummy
        [1, 1, 1],
        [1, 1, 1],
        [1, 0, 1],
        [0, 1, 1],
        [0, 0, 1],
        [1, 0, 1],
        [0, 1, 1],
        [0, 0, 1],
        [0, 1, 1],
        [0, 1, 1],
        [1, 0, 1],
        [1, 0, 1],
github macarthur-lab / tx_annotation / tx_annotation.py View on Github external
mt = mt.annotate_rows(**hl.sorted(mt.tx_annotation, key=lambda x: csq_order[
        (x.lof, hl.or_else(hl.is_missing(x.lof_flag), False), x.csq)])[0])
github hail-is / hail / hail / python / hail / methods / family_methods.py View on Github external
[1, 1, 1],
        [1, 1, 1],
        [1, 0, 1],
        [0, 1, 1],
        [0, 0, 1],
        [1, 0, 1],
        [0, 1, 1],
        [0, 0, 1],
        [0, 1, 1],
        [0, 1, 1],
        [1, 0, 1],
        [1, 0, 1],
    ], dtype=hl.tarray(hl.tarray(hl.tint64)))

    table3 = tm.annotate_cols(all_errors=hl.or_else(hl.agg.array_sum(implicated[tm.mendel_code]), [0, 0, 0]),
                              snp_errors=hl.or_else(
                                  hl.agg.filter(hl.is_snp(tm.alleles[0], tm.alleles[1]),
                                                hl.agg.array_sum(implicated[tm.mendel_code])),
                                  [0, 0, 0])).key_cols_by().cols()

    table3 = table3.select(xs=[
        hl.struct(**{ck_name: table3.father[ck_name],
                     'fam_id': table3.fam_id,
                     'errors': table3.all_errors[0],
                     'snp_errors': table3.snp_errors[0]}),
        hl.struct(**{ck_name: table3.mother[ck_name],
                     'fam_id': table3.fam_id,
                     'errors': table3.all_errors[1],
                     'snp_errors': table3.snp_errors[1]}),
        hl.struct(**{ck_name: table3.proband[ck_name],
                     'fam_id': table3.fam_id,
                     'errors': table3.all_errors[2],
github macarthur-lab / gnomadjs / data / gnomad_sv / prepare_gnomad_svs.py View on Github external
# Group gene lists for all consequences in a struct
    ds = ds.annotate(
        consequences=hl.struct(
            **{
                csq.lower(): ds.info[f"PROTEIN_CODING__{csq}"]
                for csq in PROTEIN_CODING_CONSEQUENCES
                if csq != "INTERGENIC" and csq != "NEAREST_TSS"
            }
        )
    )
    ds = ds.annotate(intergenic=ds.info.PROTEIN_CODING__INTERGENIC)

    # Collect set of all genes for which a variant has a consequence
    all_genes = hl.empty_array(hl.tstr)
    for csq in ds.consequences.dtype.fields:
        all_genes = all_genes.extend(hl.or_else(ds.consequences[csq.lower()], hl.empty_array(hl.tstr)))
    ds = ds.annotate(genes=hl.set(all_genes))

    # Group per-population values in a struct for each field
    # ds = ds.annotate(**{field.lower(): expr_for_per_population_field(ds, field) for field in PER_POPULATION_FIELDS})
    ds = ds.annotate(
        freq=hl.struct(
            **dict(
                (
                    (
                        pop,
                        hl.struct(
                            **{field.lower(): ds.info[f"{pop.upper()}_{field}"] for field in PER_POPULATION_FIELDS}
                        ),
                    )
                    for pop in POPULATIONS
                ),