How to use the hail.literal function in hail

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hail-is / hail / hail / python / hail / experimental / vcf_combiner.py View on Github external
ht = ht.select()
    ht = ht.annotate(x=hl.scan.count())
    ht = ht.annotate(y=ht.x + 1)
    ht = ht.filter(ht.x // n != ht.y // n)
    ht = ht.select()
    ht = ht.annotate(start=hl.or_else(
        hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1,
                              reference_genome=reference_genome)),
        hl.locus_from_global_position(0, reference_genome=reference_genome)))
    ht = ht.key_by()
    ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True))

    intervals = ht.aggregate(hl.agg.collect(ht.interval))

    last_st = hl.eval(
        hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1,
                                      reference_genome=reference_genome))
    interval = hl.Interval(start=last_st, end=end, includes_end=True)
    intervals.append(interval)
    return intervals
github hail-is / hail / hail / python / hail / methods / impex.py View on Github external
:class:`.Table`
        Interval-keyed table.
    """

    # UCSC BED spec defined here: https://genome.ucsc.edu/FAQ/FAQformat.html#format1

    t = import_table(path, no_header=True, delimiter=r"\s+", impute=False,
                     skip_blank_lines=True, types={'f0': tstr, 'f1': tint32,
                                                   'f2': tint32, 'f3': tstr,
                                                   'f4': tstr},
                     comment=["""^browser.*""", """^track.*""",
                              r"""^\w+=("[\w\d ]+"|\d+).*"""],
                     **kwargs)

    if contig_recoding is not None:
        contig_recoding = hl.literal(contig_recoding)

    def recode_contig(x):
        if contig_recoding is None:
            return x
        return contig_recoding.get(x, x)

    if t.row.dtype == tstruct(f0=tstr, f1=tint32, f2=tint32):
        t = t.select(interval=locus_interval_expr(recode_contig(t['f0']),
                                                  t['f1'] + 1,
                                                  t['f2'] + 1,
                                                  True,
                                                  False,
                                                  reference_genome,
                                                  skip_invalid_intervals))

    elif len(t.row) >= 4 and tstruct(**dict([(n, typ) for n, typ in t.row.dtype._field_types.items()][:4])) == tstruct(f0=tstr, f1=tint32, f2=tint32, f3=tstr):
github hail-is / hail / hail / python / hail / plot / plots.py View on Github external
x_range = (ranges.x_stats.min, ranges.x_stats.max)
        if y_range is None:
            y_range = (ranges.y_stats.min, ranges.y_stats.max)
    else:
        warnings.warn('If x_range or y_range are specified in histogram_2d, and there are points '
                      'outside of these ranges, they will not be plotted')
    x_range = list(map(float, x_range))
    y_range = list(map(float, y_range))
    x_spacing = (x_range[1] - x_range[0]) / x_bins
    y_spacing = (y_range[1] - y_range[0]) / y_bins

    def frange(start, stop, step):
        from itertools import count, takewhile
        return takewhile(lambda x: x <= stop, count(start, step))

    x_levels = hail.literal(list(frange(x_range[0], x_range[1], x_spacing))[::-1])
    y_levels = hail.literal(list(frange(y_range[0], y_range[1], y_spacing))[::-1])

    grouped_ht = source.group_by(
        x=hail.str(x_levels.find(lambda w: x >= w)),
        y=hail.str(y_levels.find(lambda w: y >= w))
    ).aggregate(c=hail.agg.count())
    data = grouped_ht.filter(hail.is_defined(grouped_ht.x) & (grouped_ht.x != str(x_range[1])) &
                             hail.is_defined(grouped_ht.y) & (grouped_ht.y != str(y_range[1]))).to_pandas()

    # Use python prettier float -> str function
    data['x'] = data['x'].apply(lambda e: str(float(e)))
    data['y'] = data['y'].apply(lambda e: str(float(e)))

    if log:
        mapper = LogColorMapper(palette=colors, low=data.c.min(), high=data.c.max())
    else:
github hail-is / hail / hail / python / hail / expr / functions.py View on Github external
>>> hl.eval(hl.is_strand_ambiguous('A', 'T'))
    True

    Parameters
    ----------
    ref : :class:`.StringExpression`
        Reference allele.
    alt : :class:`.StringExpression`
        Alternate allele.

    Returns
    -------
    :class:`.BooleanExpression`
    """
    alleles = hl.literal({('A', 'T'), ('T', 'A'), ('G', 'C'), ('C', 'G')})
    return alleles.contains((ref, alt))
github hail-is / hail / hail / python / hail / utils / interval.py View on Github external
True

        >>> interval2.contains(6)
        False

        Parameters
        ----------
        value :
            Object with type :meth:`.point_type`.

        Returns
        -------
        :obj:`bool`
        """

        return hl.eval(hl.literal(self, hl.tinterval(self._point_type)).contains(value))
github hail-is / hail / hail / python / hail / experimental / ldscsim.py View on Github external
if cov_matrix != None:
        n_phens = cov_matrix.shape[0]
    else:
        n_phens = len(h2)
        if rg == [None]:
            print(f'Assuming rg=0 for all {n_phens} traits')
            rg = [0]*int((n_phens**2-n_phens)/2)
        assert (all(x >= -1 and x<= 1 for x in rg)), 'rg values must be between 0 and 1'
        cov, rg = get_cov_matrix(h2, rg)
    cov = (1/M)*cov
    randstate = np.random.RandomState(int(seed)) #seed random state for replicability
    betas = randstate.multivariate_normal(mean=np.zeros(n_phens),cov=cov,size=[M,])
    df = pd.DataFrame([0]*M,columns=['beta'])
    tb = hl.Table.from_pandas(df)
    tb = tb.add_index().key_by('idx')
    tb = tb.annotate(beta = hl.literal(betas.tolist())[hl.int32(tb.idx)])
    mt = mt.add_row_index(name='row_idx'+tid)
    mt = mt.annotate_rows(beta = tb[mt['row_idx'+tid]]['beta'])
    mt = _clean_fields(mt, tid)
    return mt, rg
github hail-is / hail / hail / python / hail / experimental / ldscsim.py View on Github external
cov_matrix = np.asarray([[1/(ptt+ptf), rg/ptt],[rg/ptt,1/(ptt+pft)]])
        pff0, pff = pff, 1-sum([ptt, ptf, pft])
        print(f'rg: {rg0} -> {rg}\nptt: {ptt0} -> {ptt}\npff: {pff0} -> {pff}')
        pi = [ptt, ptf, pft, pff]
    beta = randstate.multivariate_normal(mean=np.zeros(2),cov=cov_matrix,size=[int(M),])
    zeros = np.zeros(shape=int(M)).T
    beta_matrix = np.stack((beta, np.asarray([beta[:,0],zeros]).T,
                            np.asarray([zeros,zeros]).T,np.asarray([zeros,beta[:,1]]).T),axis=1)
    idx = np.random.choice([0,1,2,3],p=pi,size=int(M))
    betas = beta_matrix[range(int(M)),idx,:]
    betas[:,0] *= (h2[0]/M)**(1/2)
    betas[:,1] *= (h2[1]/M)**(1/2)
    df = pd.DataFrame([0]*M,columns=['beta'])
    tb = hl.Table.from_pandas(df)
    tb = tb.add_index().key_by('idx')
    tb = tb.annotate(beta = hl.literal(betas.tolist())[hl.int32(tb.idx)])
    mt = mt.add_row_index()
    mt = mt.annotate_rows(beta = tb[mt.row_idx]['beta'])
    return mt, pi, rg
github hail-is / hail / hail / python / hail / expr / functions.py View on Github external
[0.9090909090082644, 0.09090909090082644, 9.090909090082645e-11]

    Notes
    -----
    This function assumes a uniform prior on the possible genotypes.

    Parameters
    ----------
    pl : :class:`.ArrayNumericExpression` of type :py:data:`.tint32`
        Array of Phred-scaled genotype likelihoods.

    Returns
    -------
   :class:`.ArrayNumericExpression` of type :py:data:`.tfloat64`
    """
    phred_table = hl.literal([10 ** (-x/10.0) for x in builtins.range(_cache_size)])
    gp = hl.bind(lambda pls: pls.map(lambda x: hl.cond(x >= _cache_size, 10 ** (-x/10.0), phred_table[x])), pl)
    return hl.bind(lambda gp: gp / hl.sum(gp), gp)
github macarthur-lab / tx_annotation / tx_annotation.py View on Github external
def filter_clinvar_to_gene_list(mt_kt, genes, gene_column_in_mt_kt):

    gene_names = hl.literal(genes)

    mt_kt = mt_kt.annotate(
        in_gene_of_interest=gene_names.find(lambda x: mt_kt[gene_column_in_mt_kt] == x))

    mt_kt = mt_kt.filter(mt_kt.in_gene_of_interest != "NA")

    return mt_kt
github hail-is / hail / hail / python / hail / experimental / ldscsim.py View on Github external
Returns
    -------
    :class:`.MatrixTable`
        :class:`.MatrixTable` with simulated phenotype as column field.
    """
    h2 = h2.tolist() if type(h2) is np.ndarray else ([h2] if type(h2) is not list else h2) 
    assert popstrat_var is None or (popstrat_var >= 0), 'popstrat_var must be non-negative'
    tid = ''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase, k=5)) # "temporary id" -- random string to identify temporary intermediate fields generated by this method
    mt = annotate_all(mt=mt,
                      row_exprs={'beta_'+tid:beta},
                      col_exprs={} if popstrat is None else {'popstrat_'+tid:popstrat},
                      entry_exprs={'gt_'+tid:genotype.n_alt_alleles() if genotype.dtype is dtype('call') else genotype})
    mt = normalize_genotypes(mt['gt_'+tid])
    if mt['beta_'+tid].dtype == dtype('array'): #if >1 traits
        mt = mt.annotate_cols(y_no_noise = hl.agg.array_agg(lambda beta: hl.agg.sum(beta*mt['norm_gt']),mt['beta_'+tid]))
        mt = mt.annotate_cols(y = mt.y_no_noise + hl.literal(h2).map(lambda x: hl.rand_norm(0,hl.sqrt(1-x))))
    else:
        mt = mt.annotate_cols(y_no_noise = hl.agg.sum(mt['beta_'+tid] * mt['norm_gt']))
        mt = mt.annotate_cols(y = mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1-h2[0])))
    if popstrat is not None:
        var_factor = 1 if popstrat_var is None else (popstrat_var**(1/2))/mt.aggregate_cols(hl.agg.stats(mt['popstrat_'+tid])).stdev
        mt = mt.rename({'y':'y_no_popstrat'})
        mt = mt.annotate_cols(y = mt.y_no_popstrat + mt['popstrat_'+tid]*var_factor)
    mt = _clean_fields(mt, tid)
    return mt