How to use hail - 10 common examples

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hail-is / hail / python / hail / expr / tests.py View on Github external
def test_floor_division(self):
        a_int32 = hl.array([2, 4, 8, 16, hl.null(tint32)])
        a_int64 = a_int32.map(lambda x: hl.int64(x))
        a_float32 = a_int32.map(lambda x: hl.float32(x))
        a_float64 = a_int32.map(lambda x: hl.float64(x))
        int32_4s = hl.array([4, 4, 4, 4, hl.null(tint32)])
        int32_3s = hl.array([3, 3, 3, 3, hl.null(tint32)])
        int64_3 = hl.int64(3)
        int64_3s = int32_3s.map(lambda x: hl.int64(x))
        float32_3 = hl.float32(3)
        float32_3s = int32_3s.map(lambda x: hl.float32(x))
        float64_3 = hl.float64(3)
        float64_3s = int32_3s.map(lambda x: hl.float64(x))

        expected = [0, 1, 2, 5, None]
        expected_inv = [1, 0, 0, 0, None]

        self.check_expr(a_int32 // 3, expected, tarray(tint32))
github hail-is / hail / python / hail / expr / tests.py View on Github external
def test_floor_division(self):
        a_int32 = hl.array([2, 4, 8, 16, hl.null(tint32)])
        a_int64 = a_int32.map(lambda x: hl.int64(x))
        a_float32 = a_int32.map(lambda x: hl.float32(x))
        a_float64 = a_int32.map(lambda x: hl.float64(x))
        int32_4s = hl.array([4, 4, 4, 4, hl.null(tint32)])
        int32_3s = hl.array([3, 3, 3, 3, hl.null(tint32)])
        int64_3 = hl.int64(3)
        int64_3s = int32_3s.map(lambda x: hl.int64(x))
        float32_3 = hl.float32(3)
        float32_3s = int32_3s.map(lambda x: hl.float32(x))
        float64_3 = hl.float64(3)
        float64_3s = int32_3s.map(lambda x: hl.float64(x))

        expected = [0, 1, 2, 5, None]
        expected_inv = [1, 0, 0, 0, None]

        self.check_expr(a_int32 // 3, expected, tarray(tint32))
        self.check_expr(a_int64 // 3, expected, tarray(tint64))
        self.check_expr(a_float32 // 3, expected, tarray(tfloat32))
        self.check_expr(a_float64 // 3, expected, tarray(tfloat64))

        self.check_expr(3 // a_int32, expected_inv, tarray(tint32))
github hail-is / hail / hail / python / hail / experimental / vcf_combiner.py View on Github external
ht = ht.select()
    ht = ht.annotate(x=hl.scan.count())
    ht = ht.annotate(y=ht.x + 1)
    ht = ht.filter(ht.x // n != ht.y // n)
    ht = ht.select()
    ht = ht.annotate(start=hl.or_else(
        hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1,
                              reference_genome=reference_genome)),
        hl.locus_from_global_position(0, reference_genome=reference_genome)))
    ht = ht.key_by()
    ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True))

    intervals = ht.aggregate(hl.agg.collect(ht.interval))

    last_st = hl.eval(
        hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1,
                                      reference_genome=reference_genome))
    interval = hl.Interval(start=last_st, end=end, includes_end=True)
    intervals.append(interval)
    return intervals
github hail-is / hail / hail / python / hail / experimental / vcf_combiner.py View on Github external
end = hl.Locus(reference_genome.contigs[-1],
                   reference_genome.lengths[reference_genome.contigs[-1]])

    ht = ht.select()
    ht = ht.annotate(x=hl.scan.count())
    ht = ht.annotate(y=ht.x + 1)
    ht = ht.filter(ht.x // n != ht.y // n)
    ht = ht.select()
    ht = ht.annotate(start=hl.or_else(
        hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1,
                              reference_genome=reference_genome)),
        hl.locus_from_global_position(0, reference_genome=reference_genome)))
    ht = ht.key_by()
    ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True))

    intervals = ht.aggregate(hl.agg.collect(ht.interval))

    last_st = hl.eval(
        hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1,
                                      reference_genome=reference_genome))
    interval = hl.Interval(start=last_st, end=end, includes_end=True)
    intervals.append(interval)
    return intervals
github hail-is / hail / python / hail / expr / tests.py View on Github external
def make_case(x):
            x = hl.literal(x)
            return (hl.case()
                .when(x == 6, 'A')
                .when(x % 3 == 0, 'B')
                .when(x == 5, 'C')
                .when(x < 2, 'D')
                .or_missing())
github hail-is / hail / hail / python / cluster-tests / cluster-vep-check.py View on Github external
import hail as hl

GOLD_STD = 'gs://hail-common/vep/vep/vep_examplars/vep_no_csq_4dc19bc1b.mt/'
GOLD_STD_CSQ = 'gs://hail-common/vep/vep/vep_examplars/vep_csq_4dc19bc1b.mt/'

for path, csq in [(GOLD_STD, False), (GOLD_STD_CSQ, True)]:
    print(f"Checking 'hl.vep' replicates on '{path}'")
    expected = hl.read_matrix_table(path)
    actual = hl.vep(expected.rows().select(), 'gs://hail-common/vep/vep/vep85-loftee-gcloud-testing.json', csq=csq)
    actual._force_count()
    # vep_result_agrees = actual._same(expected)
github hail-is / hail / python / hail / tests.py View on Github external
from hail.expr.expression import ExpressionException

        mt = self.get_groupable_matrix()
        self.assertRaises(ExpressionException, mt.group_rows_by, mt['group1'] + 1)
        self.assertRaises(ExpressionException, mt.group_cols_by, mt['group1'])
        self.assertRaises(ExpressionException, mt.group_cols_by, mt['group3'] + 1)
        self.assertRaises(ExpressionException, mt.group_rows_by, mt['group3'])
        self.assertRaises(ExpressionException, mt.group_rows_by, group3=mt['group1'])
        self.assertRaises(ExpressionException, mt.group_cols_by, group1=mt['group3'])
        self.assertRaises(ExpressionException, mt.group_rows_by, foo=mt['group1'])
        self.assertRaises(ExpressionException, mt.group_cols_by, foo=mt['group3'])

        a = mt.group_rows_by(group5=(mt['group2']['a'] + 1))
        self.assertRaises(ExpressionException, a.aggregate, group3=hl.agg.sum(mt['c']))
        self.assertRaises(ExpressionException, a.aggregate, group5=hl.agg.sum(mt['c']))
        self.assertRaises(ExpressionException, a.aggregate, foo=hl.agg.sum(mt['c']))

        b = mt.group_cols_by(group5=(mt['group4']['a'] + 1))
        self.assertRaises(ExpressionException, b.aggregate, group1=hl.agg.sum(mt['c']))
        self.assertRaises(ExpressionException, b.aggregate, group5=hl.agg.sum(mt['c']))
        self.assertRaises(ExpressionException, b.aggregate, foo=hl.agg.sum(mt['c']))
github hail-is / hail / hail / python / hail / methods / qc.py View on Github external
bound_exprs = {}
    gq_dp_exprs = {}

    def has_field_of_type(name, dtype):
        return name in mt.entry and mt[name].dtype == dtype

    if has_field_of_type('DP', hl.tint32):
        gq_dp_exprs['dp_stats'] = hl.agg.stats(mt.DP).select('mean', 'stdev', 'min', 'max')

    if has_field_of_type('GQ', hl.tint32):
        gq_dp_exprs['gq_stats'] = hl.agg.stats(mt.GQ).select('mean', 'stdev', 'min', 'max')

    if not has_field_of_type('GT',  hl.tcall):
        raise ValueError(f"'variant_qc': expect an entry field 'GT' of type 'call'")

    bound_exprs['n_called'] = hl.agg.count_where(hl.is_defined(mt['GT']))
    bound_exprs['n_not_called'] = hl.agg.count_where(hl.is_missing(mt['GT']))
    bound_exprs['n_filtered'] = mt.count_cols(_localize=False) - hl.agg.count()
    bound_exprs['call_stats'] = hl.agg.call_stats(mt.GT, mt.alleles)

    result = hl.rbind(hl.struct(**bound_exprs),
                      lambda e1: hl.rbind(
                          hl.case().when(hl.len(mt.alleles) == 2,
                                         hl.hardy_weinberg_test(e1.call_stats.homozygote_count[0],
                                                                e1.call_stats.AC[1] - 2 *
                                                                e1.call_stats.homozygote_count[1],
                                                                e1.call_stats.homozygote_count[1])
                                         ).or_missing(),
                          lambda hwe: hl.struct(**{
                              **gq_dp_exprs,
                              **e1.call_stats,
                              'call_rate': hl.float(e1.n_called) / (e1.n_called + e1.n_not_called + e1.n_filtered),
github macarthur-lab / gnomadjs / data / exac / export_exac_vcf_to_ht.py View on Github external
),
                    ),
                    exon=annotation.EXON,
                    gene_id=annotation.Gene,
                    gene_symbol=annotation.SYMBOL,
                    gene_symbol_source=annotation.SYMBOL_SOURCE,
                    hgnc_id=annotation.HGNC_ID,
                    hgvsc=annotation.HGVSc,
                    hgvsp=annotation.HGVSp,
                    lof=annotation.LoF,
                    lof_filter=annotation.LoF_filter,
                    lof_flags=annotation.LoF_flags,
                    lof_info=annotation.LoF_info,
                    # PolyPhen field contains "polyphen_prediction(polyphen_score)"
                    polyphen_prediction=hl.or_missing(
                        hl.is_defined(annotation.PolyPhen), annotation.PolyPhen.split("\\(")[0]
                    ),
                    protein_id=annotation.ENSP,
                    # Protein_position may contain either "start-end" or, when start == end, "start"
                    protein_start=split_position_start(annotation.Protein_position),
                    protein_end=split_position_end(annotation.Protein_position),
                    # SIFT field contains "sift_prediction(sift_score)"
                    sift_prediction=hl.or_missing(hl.is_defined(annotation.SIFT), annotation.SIFT.split("\\(")[0]),
                    transcript_id=annotation.Feature,
                )
github macarthur-lab / gnomadjs / data / gnomad_r3 / prepare_gnomad_r3_variants.py View on Github external
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input")
    parser.add_argument("output")
    args = parser.parse_args()

    hl.init(log="/tmp/hail.log")

    ds = hl.read_table(args.input)
    ds = format_variants_table(ds)

    ds.describe()

    ds.write(args.output)