Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param bi_allelic_only: If set, only bi-allelic sites are used for the computation
:param autosomes_only: If set, only autosomal intervals are used.
:param matches: If set, returns all intervals in intervals_ht that overlap the locus in the input MT.
:return: Callrate MT
"""
logger.info("Computing call rate MatrixTable")
if len(intervals_ht.key) != 1 or not isinstance(
intervals_ht.key[0], hl.expr.IntervalExpression
):
logger.warning(
f"Call rate matrix computation expects `intervals_ht` with a key of type Interval. Found: {intervals_ht.key}"
)
if autosomes_only:
callrate_mt = filter_to_autosomes(mt)
if bi_allelic_only:
callrate_mt = callrate_mt.filter_rows(bi_allelic_expr(callrate_mt))
intervals_ht = intervals_ht.annotate(_interval_key=intervals_ht.key)
callrate_mt = callrate_mt.annotate_rows(
_interval_key=intervals_ht.index(
callrate_mt.locus, all_matches=match
)._interval_key
)
if match:
callrate_mt = callrate_mt.explode_rows("_interval_key")
callrate_mt = callrate_mt.filter_rows(
hl.is_defined(callrate_mt._interval_key.interval)
table will be excluded from the PCA.
The loadings Table returned also contains a `pca_af` annotation which is the allele frequency
used for PCA. This is useful to project other samples in the PC space.
:param qc_mt: Input QC MT
:param related_samples_to_drop: Optional table of related samples to drop
:param n_pcs: Number of PCs to compute
:param autosomes_only: Whether to run the analysis on autosomes only
:return: eigenvalues, scores and loadings
"""
unrelated_mt = qc_mt.persist()
if autosomes_only:
unrelated_mt = filter_to_autosomes(unrelated_mt)
if related_samples_to_drop:
unrelated_mt = qc_mt.filter_cols(
hl.is_missing(related_samples_to_drop[qc_mt.col_key])
)
pca_evals, pca_scores, pca_loadings = hl.hwe_normalized_pca(
unrelated_mt.GT, k=n_pcs, compute_loadings=True
)
pca_af_ht = unrelated_mt.annotate_rows(
pca_af=hl.agg.mean(unrelated_mt.GT.n_alt_alleles()) / 2
).rows()
pca_loadings = pca_loadings.annotate(
pca_af=pca_af_ht[pca_loadings.key].pca_af
) # TODO: Evaluate if needed to write results at this point if relateds or not
Default function to run `generate_trio_stats_expr` to get trio stats stratified by raw and adj
.. note::
Expects that `mt` is it a trio matrix table that was annotated with adj and if dealing with
a sparse MT `hl.experimental.densify` must be run first.
By default this pipeline function will filter `mt` to only autosomes and bi-allelic sites.
:param mt: A Trio Matrix Table returned from `hl.trio_matrix`. Must be dense
:param autosomes_only: If set, only autosomal intervals are used.
:param bi_allelic_only: If set, only bi-allelic sites are used for the computation
:return: Table with trio stats
"""
if autosomes_only:
mt = filter_to_autosomes(mt)
if bi_allelic_only:
mt = mt.filter_rows(bi_allelic_expr(mt))
logger.info(f"Generating trio stats using {mt.count_cols()} trios.")
trio_adj = mt.proband_entry.adj & mt.father_entry.adj & mt.mother_entry.adj
ht = mt.select_rows(
**generate_trio_stats_expr(
mt,
transmitted_strata={"raw": True, "adj": trio_adj},
de_novo_strata={"raw": True, "adj": trio_adj},
ac_strata={"raw": True, "adj": trio_adj},
)
).rows()
return ht
.. note::
By default this pipeline function will filter `mt` to only autosomes and bi-allelic sites.
:param mt: Input Matrix table
:param relatedness_ht: Input relationship table
:param i_col: Column containing the 1st sample of the pair in the relationship table
:param j_col: Column containing the 2nd sample of the pair in the relationship table
:param relationship_col: Column containing the relationship for the sample pair as defined in this module constants.
:param autosomes_only: If set, only autosomal intervals are used.
:param bi_allelic_only: If set, only bi-allelic sites are used for the computation
:return: A Table with the sibling shared variant counts
"""
if autosomes_only:
mt = filter_to_autosomes(mt)
if bi_allelic_only:
mt = mt.filter_rows(bi_allelic_expr(mt))
sib_ht = relatedness_ht.filter(relatedness_ht[relationship_col] == SIBLINGS)
s_to_keep = sib_ht.aggregate(
hl.agg.explode(
lambda s: hl.agg.collect_as_set(s), [sib_ht[i_col].s, sib_ht[j_col].s]
),
_localize=False,
)
mt = mt.filter_cols(s_to_keep.contains(mt.s))
if "adj" not in mt.entry:
mt = annotate_adj(mt)
sib_stats_ht = mt.select_rows(
**generate_sib_stats_expr(