How to use the gnomad.utils.gnomad_functions.logger function in gnomad

To help you get started, we’ve selected a few gnomad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github macarthur-lab / gnomad_hail / gnomad / utils / sample_qc.py View on Github external
:param normal_ploidy_cutoff: Number of standard deviations to use when determining sex chromosome ploidy cutoffs for XX, XY karyotypes.
    :param aneuploidy_cutoff: Number of standard deviations to use when sex chromosome ploidy cutoffs for aneuploidies.
    :return: Tuple of ploidy cutoff tuples: ((x_ploidy_cutoffs), (y_ploidy_cutoffs))
    """
    # Group sex chromosome ploidy table by f_stat cutoff and get mean/stdev for chrX/Y ploidies
    sex_stats = ht.aggregate(
        hl.agg.group_by(
            hl.cond(ht.f_stat < f_stat_cutoff, "xx", "xy"),
            hl.struct(
                    x=hl.agg.stats(ht.chrX_ploidy),
                    y=hl.agg.stats(ht.chrY_ploidy)
            )
        )
    )
    logger.info(f"XX stats: {sex_stats['xx']}") 
    logger.info(f"XY stats: {sex_stats['xy']}")

    cutoffs = (
                (
                    sex_stats["xy"].x.mean + (normal_ploidy_cutoff * sex_stats["xy"].x.stdev),
                    (sex_stats["xx"].x.mean - (normal_ploidy_cutoff * sex_stats["xx"].x.stdev), sex_stats["xx"].x.mean + (normal_ploidy_cutoff * sex_stats["xx"].x.stdev)),
                    sex_stats["xx"].x.mean + (aneuploidy_cutoff * sex_stats["xx"].x.stdev)
                ),
                (
                    (sex_stats["xx"].y.mean + (normal_ploidy_cutoff * sex_stats["xx"].y.stdev), sex_stats["xy"].y.mean + (normal_ploidy_cutoff * sex_stats["xy"].y.stdev)), 
                    sex_stats["xy"].y.mean + (aneuploidy_cutoff * sex_stats["xy"].y.stdev)
                )
            )
 
    logger.info(f"X ploidy cutoffs: {cutoffs[0]}") 
    logger.info(f"Y ploidy cutoffs: {cutoffs[1]}")
    return cutoffs
github macarthur-lab / gnomad_hail / gnomad / utils / sample_qc.py View on Github external
filter_expr.append((mt.site_callrate > min_callrate))
    if min_inbreeding_coeff_threshold is not None:
        filter_expr.append((mt.site_inbreeding_coeff > min_inbreeding_coeff_threshold))
    if min_hardy_weinberg_threshold is not None:
        filter_expr.append((mt.hwe.p_value > min_hardy_weinberg_threshold))
    if snv_only:
        filter_expr.append(hl.is_snp(mt.alleles[0], mt.alleles[1]))
    if bi_allelic_only:
        filter_expr.append(bi_allelic_expr(mt))

    if apply_hard_filters:
        if 'info' in mt.row_value:
            if 'QD' in mt.info:
                filter_expr.append((mt.info.QD >= 2))
            else:
                logger.warn("Could not apply QD hard filter, as `info.QD` not found in schema.")
            if 'FS' in mt.info:
                filter_expr.append((mt.info.FS <= 60))
            else:
                logger.warn("Could not apply FS hard filter, as `info.FS` not found in schema.")
            if 'MQ' in mt.info:
                filter_expr.append((mt.info.MQ >= 30))
            else:
                logger.warn("Could not apply MQ hard filter, as `info.MQ` not found in schema.")
        else:
            logger.warn("Could not apply hard filters as `info` not found in schema.")

    return mt.filter_rows(functools.reduce(operator.iand, filter_expr))
github macarthur-lab / gnomad_hail / gnomad / utils / sample_qc.py View on Github external
:param min_af: Minimum allele frequency to keep. Not applied if set to ``None``.
    :param min_callrate: Minimum call rate to keep. Not applied if set to ``None``.
    :param min_inbreeding_coeff_threshold: Minimum site inbreeding coefficient to keep. Not applied if set to ``None``.
    :param min_hardy_weinberg_threshold: Minimum site HW test p-value to keep. Not applied if set to ``None``.
    :param apply_hard_filters: Whether to apply standard GAKT default site hard filters: QD >= 2, FS <= 60 and MQ >= 30
    :param ld_r2: Minimum r2 to keep when LD-pruning (set to `None` for no LD pruning)
    :param filter_lcr: Filter LCR regions
    :param filter_decoy: Filter decoy regions
    :param filter_segdup: Filter segmental duplication regions
    :param filter_exome_low_coverage_regions: If set, only high coverage exome regions (computed from gnomAD are kept)
    :param high_conf_regions: If given, the data will be filtered to only include variants in those regions
    :return: Filtered MT
    """
    logger.info("Creating QC MatrixTable")
    if ld_r2 is not None:
        logger.warn("The LD-prune step of this function requires non-preemptible workers only!")

    qc_mt = filter_low_conf_regions(
        mt,
        filter_lcr=filter_lcr,
        filter_decoy=filter_decoy,
        filter_segdup=filter_segdup,
        filter_exome_low_coverage_regions=filter_exome_low_coverage_regions, high_conf_regions=high_conf_regions
    )

    if adj_only:
        qc_mt = filter_to_adj(qc_mt)  # TODO: Make sure that this works fine before call rate filtering

    qc_mt = filter_rows_for_qc(
        qc_mt,
        min_af,
        min_callrate,