How to use gnomad - 10 common examples

To help you get started, we’ve selected a few gnomad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github macarthur-lab / gnomad_hail / gnomad / utils / liftover.py View on Github external
def main(args):

    hl.init(log='/liftover.log')
    
    if args.gnomad:
        gnomad = True
        path = None
        
        if args.exomes:
            data_type = 'exomes'
        if args.genomes:
            data_type = 'genomes'

        logger.info('Working on gnomAD {} release ht'.format(data_type))
        logger.info('Reading in release ht')
        t = public_release(data_type).ht()
        logger.info('Variants in release ht: {}'.format(t.count()))

    else:
        data_type = None
        gnomad = False
   
        if args.ht:
            path = args.ht
            t = hl.read_table(args.ht)
        if args.mt:
            path = args.mt
            t = hl.read_matrix_table(args.mt)
   
    logger.info('Checking if input data has been split') 
    if 'was_split' not in t.row:
        t = hl.split_multi(t) if isinstance(t, hl.Table) else hl.split_multi_hts(t)
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / reference_data.py View on Github external
import_func=hl.import_vcf,
            import_args={
                "path": "gs://genomics-public-data/1000-genomes-phase-3/vcf-20150220/ALL.chr*.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf",
                "force_bgz": True,
                "skip_invalid_loci": True,
                "min_partitions": 300,
                "reference_genome": "GRCh37",
            },
        ),
    },
)

kgp = VersionedTableResource(
    default_version="phase_1_hc",
    versions={
        "phase_1_hc": TableResource(
            path="gs://gnomad-public/resources/grch37/kgp/1000G_phase1.snps.high_confidence.b37.ht",
            import_func=import_sites_vcf,
            import_args={
                "path": "gs://gnomad-public/resources/grch37/kgp/1000G_phase1.snps.high_confidence.b37.vcf.bgz",
                "force_bgz": True,
                "skip_invalid_loci": True,
                "min_partitions": 100,
                "reference_genome": "GRCh37",
            },
        ),
    },
)

cpg_sites = TableResource(path="gs://gnomad-public/resources/grch37/cpg_sites/cpg.ht")

methylation_sites = TableResource(
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / reference_data.py View on Github external
import_args={
        "path": "gs://gnomad-public/resources/grch37/decoy_intervals/mm-2-merged.GRCh37_compliant.bed",
        "reference_genome": "GRCh37",
    },
)

purcell_5k_intervals = TableResource(
    path="gs://gnomad-public/resources/grch37/purcell_5k_intervals/purcell5k.ht",
    import_func=hl.import_locus_intervals,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/purcell_5k_intervals/purcell5k.interval_list",
        "reference_genome": "GRCh37",
    },
)

seg_dup_intervals = TableResource(
    path="gs://gnomad-public/resources/grch37/seg_dup_intervals/hg19_self_chain_split_both.ht",
    import_func=hl.import_bed,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/seg_dup_intervals/hg19_self_chain_split_both.bed",
        "reference_genome": "GRCh37",
    },
)

exome_hc_intervals = TableResource(
    path="gs://gnomad-public/resources/grch37/broad_intervals/exomes_high_coverage.auto.interval_list.ht",
    import_func=hl.import_locus_intervals,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/broad_intervals/exomes_high_coverage.auto.interval_list",
        "reference_genome": "GRCh37",
    },
)
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / reference_data.py View on Github external
"reference_genome": "GRCh37",
    },
)

kgp_omni = TableResource(
    path="gs://gnomad-public/resources/grch37/kgp/1000G_omni2.5.b37.ht",
    import_func=import_sites_vcf,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/kgp/1000G_omni2.5.b37.vcf.bgz",
        "force_bgz": True,
        "min_partitions": 100,
        "reference_genome": "GRCh37",
    },
)

mills = TableResource(
    path="gs://gnomad-public/resources/grch37/mills/Mills_and_1000G_gold_standard.indels.b37.ht",
    import_func=import_sites_vcf,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/mills/Mills_and_1000G_gold_standard.indels.b37.vcf.bgz",
        "force_bgz": True,
        "min_partitions": 100,
        "reference_genome": "GRCh37",
    },
)

syndip = MatrixTableResource(
    path="gs://gnomad-public/resources/grch37/syndip/hybrid.m37m.mt",
    import_func=hl.import_vcf,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/syndip/hybrid.m37m.vcf.bgz",
        "min_partitions": 100,
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / gnomad_ld.py View on Github external
def ld_index(pop: str) -> TableResource:
    return TableResource(path=_ld_index_path("genomes", pop))
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / gnomad.py View on Github external
"""

    if data_type not in DATA_TYPES:
        raise DataException(f"{data_type} not in {DATA_TYPES}")

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_public_release_ht_path(data_type, release))
            for release in releases
        },
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / gnomad_ld.py View on Github external
def ld_scores(pop: str) -> TableResource:
    return TableResource(path=_ld_scores_path("genomes", pop))
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / gnomad.py View on Github external
"""
    if data_type not in DATA_TYPES:
        raise DataException(f"{data_type} not in {DATA_TYPES}")

    if data_type == "exomes":
        current_release = "2.1"
        releases = EXOME_RELEASES
        releases.remove("2.1.1")
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_public_coverage_ht_path(data_type, release))
            for release in releases
        },
github macarthur-lab / gnomad_hail / gnomad / resources / grch37 / reference_data.py View on Github external
import_args={
        "path": "gs://gnomad-public/resources/grch37/broad_intervals/hg19-v0-wgs_evaluation_regions.v1.interval_list",
        "reference_genome": "GRCh37",
    },
)

na12878_hc_intervals = TableResource(
    path="gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_intervals.ht",
    import_func=hl.import_bed,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-Solid-10X_CHROM1-X_v3.3_highconf.bed",
        "reference_genome": "GRCh37",
    },
)

syndip_hc_intervals = TableResource(
    path="gs://gnomad-public/resources/grch37/syndip/syndip_highconf_genome_intervals.ht",
    import_func=hl.import_bed,
    import_args={
        "path": "gs://gnomad-public/resources/grch37/syndip/hybrid.m37m.bed",
        "reference_genome": "GRCh37",
    },
)


def get_truth_ht() -> hl.Table:
    """
    Returns a table with the following annotations from the latest version of the corresponding truth data:
    - hapmap
    - kgp_omni (1000 Genomes intersection Onni 2.5M array)
    - kgp_phase_1_hc (high confidence sites in 1000 genonmes)
    - mills (Mills & Devine indels)
github macarthur-lab / gnomad_hail / gnomad / resources / grch38 / gnomad.py View on Github external
if data_type not in DATA_TYPES:
        raise DataException(
            f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
        )

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release: TableResource(path=_public_release_ht_path(data_type, release))
            for release in releases
        },