How to use the hail.or_missing function in hail

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hail-is / hail / hail / python / hail / experimental / import_gtf.py View on Github external
'f2': 'feature',
                    'f3': 'start',
                    'f4': 'end',
                    'f5': 'score',
                    'f6': 'strand',
                    'f7': 'frame',
                    'f8': 'attribute'})

    ht = ht.annotate(attribute=hl.dict(
        hl.map(lambda x: (x.split(' ')[0],
                          x.split(' ')[1].replace('"', '').replace(';$', '')),
               ht['attribute'].split('; '))))

    attributes = ht.aggregate(hl.agg.explode(lambda x: hl.agg.collect_as_set(x), ht['attribute'].keys()))

    ht = ht.transmute(**{x: hl.or_missing(ht['attribute'].contains(x),
                                          ht['attribute'][x])
                         for x in attributes if x})

    if reference_genome:
        if reference_genome == 'GRCh37':
            ht = ht.annotate(seqname=ht['seqname'].replace('^chr', ''))
        else:
            ht = ht.annotate(seqname=hl.case()
                                       .when(ht['seqname'].startswith('HLA'), ht['seqname'])
                                       .when(ht['seqname'].startswith('chrHLA'), ht['seqname'].replace('^chr', ''))
                                       .when(ht['seqname'].startswith('chr'), ht['seqname'])
                                       .default('chr' + ht['seqname']))
        if skip_invalid_contigs:
            valid_contigs = hl.literal(set(hl.get_reference(reference_genome).contigs))
            ht = ht.filter(valid_contigs.contains(ht['seqname']))
        ht = ht.transmute(interval=hl.locus_interval(ht['seqname'],
github macarthur-lab / gnomadjs / data / exac / export_exac_vcf_to_ht.py View on Github external
def split_position_start(position):
    return hl.or_missing(
        hl.is_defined(position),
        hl.bind(lambda start: hl.cond(start == "?", hl.null(hl.tint), hl.int(start)), position.split("-")[0]),
    )
github macarthur-lab / gnomadjs / data / gnomad_r2_1 / prepare_gnomad_mnvs_for_browser.py View on Github external
for snp in ["snp1", "snp2", "snp3"]
        ]
    )

    ds = ds.annotate(constituent_snv_ids=[ds.snp1, ds.snp2, ds.snp3])

    ds = ds.annotate(
        mnv_in_exome=ds.constituent_snvs.all(lambda s: hl.is_defined(s.exome)),
        mnv_in_genome=ds.constituent_snvs.all(lambda s: hl.is_defined(s.genome)),
    )

    ds = ds.transmute(
        n_individuals=ds.n_indv_tnv,
        ac=ds.AC_tnv,
        ac_hom=ds.n_tnv_hom,
        exome=hl.or_missing(
            ds.mnv_in_exome,
            hl.struct(
                n_individuals=ds.n_indv_tnv_ex, ac=ds.AC_tnv_ex, ac_hom=ds.n_tnv_hom_ex
            ),
        ),
        genome=hl.or_missing(
            ds.mnv_in_genome,
            hl.struct(
                n_individuals=ds.n_indv_tnv_gen,
                ac=ds.AC_tnv_gen,
                ac_hom=ds.n_tnv_hom_gen,
            ),
        ),
    )

    ds = ds.drop("AC_snp1", "AC_snp2", "AC_snp3")
github hail-is / hail / hail / python / hail / experimental / densify.py View on Github external
                lambda entry: hl.scan._prev_nonnull(hl.or_missing(hl.is_defined(entry.END), entry)),
                t.__entries),
github hail-is / hail / hail / python / hail / experimental / phase_by_transmission.py View on Github external
Returns phased genotype calls in the case of a diploid proband
        (autosomes, PAR regions of sex chromosomes or non-PAR regions of a female proband)

        :param LocusExpression locus: Locus in the trio MatrixTable
        :param ArrayExpression alleles: Alleles in the trio MatrixTable
        :param CallExpression proband_call: Input proband genotype call
        :param CallExpression father_call: Input father genotype call
        :param CallExpression mother_call: Input mother genotype call
        :return: Array containing: phased proband call, phased father call, phased mother call
        :rtype: ArrayExpression
        """

        proband_v = proband_call.one_hot_alleles(alleles)
        father_v = hl.cond(
            locus.in_x_nonpar() | locus.in_y_nonpar(),
            hl.or_missing(father_call.is_haploid(), hl.array([father_call.one_hot_alleles(alleles)])),
            call_to_one_hot_alleles_array(father_call, alleles)
        )
        mother_v = call_to_one_hot_alleles_array(mother_call, alleles)

        combinations = hl.flatmap(
            lambda f:
            hl.zip_with_index(mother_v)
                .filter(lambda m: m[1] + f[1] == proband_v)
                .map(lambda m: hl.struct(m=m[0], f=f[0])),
            hl.zip_with_index(father_v)
        )

        return (
            hl.or_missing(
                hl.is_defined(combinations) & (hl.len(combinations) == 1),
                hl.array([
github macarthur-lab / gnomad_hail / gnomad / utils / sparse_mt.py View on Github external
                    lambda x: hl.or_missing(  # 2.1 get the start position of blocks that overlap the current locus
                        (x[1] >= ht.locus.position) & (x[0].contig == ht.locus.contig),
                        x[0].position,
                    )
github hail-is / hail / hail / python / hail / experimental / phase_by_transmission.py View on Github external
"""
        Returns phased genotype calls in the case of a haploid proband in the non-PAR region of X

        :param CallExpression proband_call: Input proband genotype call
        :param CallExpression father_call: Input father genotype call
        :param CallExpression mother_call: Input mother genotype call
        :return: Array containing: phased proband call, phased father call, phased mother call
        :rtype: ArrayExpression
        """

        transmitted_allele = hl.zip_with_index(hl.array([mother_call[0], mother_call[1]])).find(lambda m: m[1] == proband_call[0])
        return hl.or_missing(
            hl.is_defined(transmitted_allele),
            hl.array([
                hl.call(proband_call[0], phased=True),
                hl.or_missing(father_call.is_haploid(), hl.call(father_call[0], phased=True)),
                phase_parent_call(mother_call, transmitted_allele[0])
            ])
github macarthur-lab / hail-elasticsearch-pipelines / hail_scripts / v02 / utils / hail_utils.py View on Github external
**{ref_contig.replace("chr", ""): ref_contig for ref_contig in ref.contigs if "chr" in ref_contig},
        **{f"chr{ref_contig}": ref_contig for ref_contig in ref.contigs if "chr" not in ref_contig}}

    mt = hl.import_vcf(
        vcf_path,
        reference_genome=f"GRCh{genome_version}",
        contig_recoding=contig_recoding,
        min_partitions=min_partitions,
        force_bgz=force_bgz,
        drop_samples=drop_samples,
        skip_invalid_loci=skip_invalid_loci)

    mt = mt.annotate_globals(sourceFilePath=vcf_path, genomeVersion=genome_version)

    mt = mt.annotate_rows(
        original_alt_alleles=hl.or_missing(hl.len(mt.alleles) > 2, get_expr_for_variant_ids(mt.locus, mt.alleles))
    )

    if split_multi_alleles:
        mt = hl.split_multi_hts(mt)
        mt = mt.key_rows_by(**hl.min_rep(mt.locus, mt.alleles))

    return mt
github macarthur-lab / gnomad_hail / gnomad / utils / annotations.py View on Github external
def get_alt_count(locus, gt, is_female):
        """
        Helper method to calculate alt allele count with sex info if present
        """
        if is_female is None:
            return hl.or_missing(locus.in_autosome(), gt.n_alt_alleles())
        return (
            hl.case()
                .when(locus.in_autosome_or_par(), gt.n_alt_alleles())
                .when(
                ~is_female & (locus.in_x_nonpar() | locus.in_y_nonpar()),
                hl.min(1, gt.n_alt_alleles()),
            )
                .when(is_female & locus.in_y_nonpar(), 0)
                .default(0)
        )
github macarthur-lab / gnomad_hail / gnomad / utils / sparse_mt.py View on Github external
lambda entry: hl.scan._prev_nonnull(  # 1. Keep a scan of the entries using _prev_nonnull
                        hl.or_missing(
                            hl.is_defined(
                                entry.END
                            ),  # Update the scan whenever a new ref block is encountered
                            hl.tuple(
                                [  # 1.1 keep the start (ht.locus) and end (entry.END) of each ref block
                                    ht.locus,
                                    entry.END,
                                ]