How to use the gnomad.resources.resource_utils.DataException function in gnomad

To help you get started, we’ve selected a few gnomad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github macarthur-lab / gnomad_hail / gnomad / utils / vep.py View on Github external
def vep_config_path(ref: str = "GRCh37"):
    if ref not in VEP_REFERENCE_DATA.keys():
        raise DataException(
            "Select reference as one of: {}".format(",".join(VEP_REFERENCE_DATA.keys()))
        )
    return VEP_REFERENCE_DATA[ref]["vep_config"]
github macarthur-lab / gnomad_hail / gnomad / utils / vep.py View on Github external
def vep_context_ht_path(ref: str = "GRCh37"):
    if ref not in VEP_REFERENCE_DATA.keys():
        raise DataException(
            "Select reference as one of: {}".format(",".join(VEP_REFERENCE_DATA.keys()))
        )
    return VEP_REFERENCE_DATA[ref]["all_possible"]
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / gnomad.py View on Github external
def liftover(data_type: str) -> VersionedTableResource:
    """
    Get the 38 liftover of gnomad v2.1.1

    :param data_type: One of "exomes" or "genomes"
    :return: Release Table
    """
    if data_type not in DATA_TYPES:
        raise DataException(f"{data_type} not in {DATA_TYPES}")

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
        releases.remove("2.1")
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_liftover_data_path(data_type, release))
            for release in releases
        },
github macarthur-lab / gnomad_hail / gnomad / resources / grch38 / gnomad.py View on Github external
def coverage(data_type: str) -> VersionedTableResource:
    """
    Retrieves gnomAD's coverage table by data_type

    :param data_type: One of "exomes" or "genomes"
    :return: Coverage Table
    """
    if data_type not in DATA_TYPES:
        raise DataException(
            f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
        )

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_public_coverage_ht_path(data_type, release))
            for release in releases
        },
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / gnomad.py View on Github external
def coverage(data_type: str) -> VersionedTableResource:
    """
    Retrieves gnomAD's coverage table by data_type

    :param data_type: One of "exomes" or "genomes"
    :return: Coverage Table
    """
    if data_type not in DATA_TYPES:
        raise DataException(f"{data_type} not in {DATA_TYPES}")

    if data_type == "exomes":
        current_release = "2.1"
        releases = EXOME_RELEASES
        releases.remove("2.1.1")
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_public_coverage_ht_path(data_type, release))
            for release in releases
        },
github macarthur-lab / gnomad_hail / gnomad / resources / grch38 / gnomad.py View on Github external
raise DataException(
            f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
        )

    if data_type == "exomes":
        if version is None:
            version = CURRENT_EXOME_RELEASE
        elif version not in EXOME_RELEASES:
            raise DataException(
                f"Version {version} of gnomAD exomes for GRCh38 does not exist"
            )
    else:
        if version is None:
            version = CURRENT_GENOME_RELEASE
        elif version not in GENOME_RELEASES:
            raise DataException(
                f"Version {version} of gnomAD genomes for GRCh38 does not exist"
            )

    return f"gs://gnomad-public/release/{version}/coverage/{data_type}/gnomad.{data_type}.r{version}.coverage.summary.tsv.bgz"
github macarthur-lab / gnomad_hail / gnomad / resources / grch38 / gnomad.py View on Github external
def public_release(data_type: str) -> VersionedTableResource:
    """
    Retrieves publicly released versioned table resource

    :param data_type: One of "exomes" or "genomes"
    :return: Release Table
    """

    if data_type not in DATA_TYPES:
        raise DataException(
            f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
        )

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_public_release_ht_path(data_type, release))
            for release in releases
        },
github macarthur-lab / gnomad_hail / gnomad / utils / filtering.py View on Github external
:param sample_path: Path to a file with list of samples
    :param header: Whether file with samples has a header. Default is True
    :param table_key: Key to sample Table. Default is "s"
    :param sparse: Whether the MatrixTable is sparse. Default is False
    :param gt_expr: Name of field in MatrixTable containing genotype expression. Default is "GT"
    :return: MatrixTable subsetted to specified samples and their variants
    """
    sample_ht = hl.import_table(sample_path, no_header=not header, key=table_key)
    sample_count = sample_ht.count()
    missing_ht = sample_ht.anti_join(mt.cols())
    missing_ht_count = missing_ht.count()
    full_count = mt.count_cols()

    if missing_ht_count != 0:
        missing_samples = missing_ht.s.collect()
        raise DataException(
            f"Only {sample_count - missing_ht_count} out of {sample_count} "
            "subsetting-table IDs matched IDs in the MT.\n"
            f"IDs that aren't in the MT: {missing_samples}\n"
        )

    mt = mt.semi_join_cols(sample_ht)
    if sparse:
        mt = mt.filter_rows(
            hl.agg.any(mt[gt_expr].is_non_ref() | hl.is_defined(mt.END))
        )
    else:
        mt = mt.filter_rows(hl.agg.any(mt[gt_expr].is_non_ref()))

    logger.info(
        f"Finished subsetting samples. Kept {mt.count_cols()} "
        f"out of {full_count} samples in MT"
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / gnomad.py View on Github external
def public_release(data_type: str) -> VersionedTableResource:
    """
    Retrieves publicly released versioned table resource

    :param data_type: One of "exomes" or "genomes"
    :return: Release Table
    """

    if data_type not in DATA_TYPES:
        raise DataException(f"{data_type} not in {DATA_TYPES}")

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_public_release_ht_path(data_type, release))
            for release in releases
        },