How to use the hail.len function in hail

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hail-is / hail / hail / python / hail / experimental / vcf_combiner.py View on Github external
lambda i:
                            hl.cond(hl.is_missing(row.data[i].__entries),
                                    hl.range(0, hl.len(gbl.g[i].__cols))
                                      .map(lambda _: hl.null(row.data[i].__entries.dtype.element_type)),
                                    hl.bind(
                                        lambda old_to_new: row.data[i].__entries.map(
                                            lambda e: renumber_entry(e, old_to_new)),
                                        hl.range(0, hl.len(alleles.local[i])).map(
                                            lambda j: combined_allele_index[alleles.local[i][j]])))),
                        hl.dict(hl.range(0, hl.len(alleles.globl)).map(
github macarthur-lab / hail-elasticsearch-pipelines / hail_scripts / v02 / utils / computed_fields / variant_id.py View on Github external
def get_expr_for_end_pos(table):
    return table.locus.position + hl.len(get_expr_for_ref_allele(table)) - 1
github hail-is / hail / python / hail / methods / statgen.py View on Github external
newPL = hl.cond(
            hl.is_defined(mt.PL),
            (hl.range(0, hl.triangle(hl.len(mt.alleles)))
             .map(lambda newi: hl.min(hl.range(0, hl.triangle(hl.len(mt.old_alleles)))
                                      .filter(lambda oldi: hl.bind(
                lambda oldc: hl.call(mt.__old_to_new_no_na[oldc[0]],
                                     mt.__old_to_new_no_na[oldc[1]]) == hl.unphased_diploid_gt_index_call(newi),
                hl.unphased_diploid_gt_index_call(oldi)))
                                      .map(lambda oldi: mt.PL[oldi])))),
            hl.null(tarray(tint32)))
        return mt.annotate_entries(
            GT=hl.call(mt.__old_to_new_no_na[mt.GT[0]],
                       mt.__old_to_new_no_na[mt.GT[1]]),
            AD=hl.cond(
                hl.is_defined(mt.AD),
                (hl.range(0, hl.len(mt.alleles))
                 .map(lambda newi: hl.sum(hl.range(0, hl.len(mt.old_alleles))
                                          .filter(lambda oldi: mt.__old_to_new_no_na[oldi] == newi)
                                          .map(lambda oldi: mt.AD[oldi])))),
                hl.null(tarray(tint32))),
            # DP unchanged
            GQ=hl.gq_from_pl(newPL),
            PL=newPL).drop('__old_to_new_no_na')
github hail-is / hail / hail / python / hail / expr / expressions / typed_expressions.py View on Github external
def _summary_aggs(self):
        length = hl.len(self)
        return hl.tuple((
            hl.agg.min(length),
            hl.agg.max(length),
            hl.agg.mean(length),
            hl.agg.explode(lambda elt: elt._all_summary_aggs(), self)))
github macarthur-lab / gnomad_hail / gnomad / utils / vep.py View on Github external
find_worst_transcript_consequence
    ).values()
    sorted_canonical_scores = hl.sorted(
        worst_csq_gene_canonical, key=lambda tc: tc.csq_score
    )

    vep_data = mt[vep_root].annotate(
        transcript_consequences=transcript_csqs,
        worst_consequence_term=csqs.find(
            lambda c: transcript_csqs.map(
                lambda csq: csq.most_severe_consequence
            ).contains(c)
        ),
        worst_csq_by_gene=sorted_scores,
        worst_csq_for_variant=hl.or_missing(
            hl.len(sorted_scores) > 0, sorted_scores[0]
        ),
        worst_csq_by_gene_canonical=sorted_canonical_scores,
        worst_csq_for_variant_canonical=hl.or_missing(
            hl.len(sorted_canonical_scores) > 0, sorted_canonical_scores[0]
        ),
    )

    return (
        mt.annotate_rows(**{vep_root: vep_data})
        if isinstance(mt, hl.MatrixTable)
        else mt.annotate(**{vep_root: vep_data})
    )
github hail-is / hail / hail / python / hail / experimental / vcf_combiner.py View on Github external
lambda combined_allele_index:
                        hl.range(0, hl.len(row.data)).flatmap(
                            lambda i:
                            hl.cond(hl.is_missing(row.data[i].__entries),
                                    hl.range(0, hl.len(gbl.g[i].__cols))
                                      .map(lambda _: hl.null(row.data[i].__entries.dtype.element_type)),
                                    hl.bind(
                                        lambda old_to_new: row.data[i].__entries.map(
                                            lambda e: renumber_entry(e, old_to_new)),
                                        hl.range(0, hl.len(alleles.local[i])).map(
                                            lambda j: combined_allele_index[alleles.local[i][j]])))),
                        hl.dict(hl.range(0, hl.len(alleles.globl)).map(
github macarthur-lab / gnomad_hail / gnomad / utils / annotations.py View on Github external
)

    # If a sample is in sib_ht more than one time, keep only one of the sibling pairs
    # First filter to only samples found in mt to keep as many pairs as possible
    s_to_keep = mt.aggregate_cols(hl.agg.collect_as_set(mt.s), _localize=False)
    sib_ht = sib_ht.filter(
        s_to_keep.contains(sib_ht[i_col].s) & s_to_keep.contains(sib_ht[j_col].s)
    )
    sib_ht = sib_ht.add_index("sib_idx")
    sib_ht = sib_ht.annotate(sibs=[sib_ht[i_col].s, sib_ht[j_col].s])
    sib_ht = sib_ht.explode("sibs")
    sib_ht = sib_ht.group_by("sibs").aggregate(
        sib_idx=(hl.agg.take(sib_ht.sib_idx, 1, ordering=sib_ht.sib_idx)[0])
    )
    sib_ht = sib_ht.group_by(sib_ht.sib_idx).aggregate(sibs=hl.agg.collect(sib_ht.sibs))
    sib_ht = sib_ht.filter(hl.len(sib_ht.sibs) == 2).persist()

    logger.info(
        f"Generating sibling variant sharing counts using {sib_ht.count()} pairs."
    )
    sib_ht = sib_ht.explode("sibs").key_by("sibs")[mt.s]

    # Create sibling sharing counters
    sib_stats = hl.struct(
        **{
            f"n_sib_shared_variants_{name}": hl.sum(
                hl.agg.filter(
                    expr,
                    hl.agg.group_by(
                        sib_ht.sib_idx,
                        hl.or_missing(
                            hl.agg.sum(hl.is_defined(mt.GT)) == 2,
github macarthur-lab / gnomad_hail / utils / generic.py View on Github external
nr: A dict of sample_num -> genotype index in concordance table for all non-ref samples

        :param mt:
        :param common_samples:
        :return:
        """
        ht = mt.select_rows(
            nr=hl.dict(
                hl.agg.collect(
                    hl.agg.filter(common_samples.contains(mt.s) & hl.or_else(mt.GT.is_non_ref(), True), hl.tuple([common_samples[mt.s], hl.or_else(mt.GT.n_alt_alleles() + 2, 1)]))
                )
            )
        ).rows()

        if filter_monomorphic:
            ht = ht.filter(hl.len(ht.nr) > 0)

        return ht
github hail-is / hail / hail / python / hail / experimental / vcf_combiner.py View on Github external
lambda row: hl.rbind(
                hl.len(row.alleles), '' == row.alleles[-1],
                lambda alleles_len, has_non_ref: hl.struct(
                    locus=row.locus,
                    alleles=hl.cond(has_non_ref, row.alleles[:-1], row.alleles),
                    rsid=row.rsid,
                    __entries=row.__entries.map(
                        lambda e:
                        hl.struct(
                            DP=e.DP,
                            END=row.info.END,
                            GQ=e.GQ,
                            LA=hl.range(0, alleles_len - hl.cond(has_non_ref, 1, 0)),
                            LAD=hl.cond(has_non_ref, e.AD[:-1], e.AD),
                            LGT=e.GT,
                            LPGT=e.PGT,
                            LPL=hl.cond(has_non_ref,
                                        hl.cond(alleles_len > 2,