Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(args):
hl.init(log='/liftover.log')
if args.gnomad:
gnomad = True
path = None
if args.exomes:
data_type = 'exomes'
if args.genomes:
data_type = 'genomes'
logger.info('Working on gnomAD {} release ht'.format(data_type))
logger.info('Reading in release ht')
t = public_release(data_type).ht()
logger.info('Variants in release ht: {}'.format(t.count()))
else:
data_type = None
gnomad = False
if args.ht:
path = args.ht
t = hl.read_table(args.ht)
if args.mt:
path = args.mt
t = hl.read_matrix_table(args.mt)
logger.info('Checking if input data has been split')
if 'was_split' not in t.row:
t = hl.split_multi(t) if isinstance(t, hl.Table) else hl.split_multi_hts(t)
import_func=hl.import_vcf,
import_args={
"path": "gs://genomics-public-data/1000-genomes-phase-3/vcf-20150220/ALL.chr*.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf",
"force_bgz": True,
"skip_invalid_loci": True,
"min_partitions": 300,
"reference_genome": "GRCh37",
},
),
},
)
kgp = VersionedTableResource(
default_version="phase_1_hc",
versions={
"phase_1_hc": TableResource(
path="gs://gnomad-public/resources/grch37/kgp/1000G_phase1.snps.high_confidence.b37.ht",
import_func=import_sites_vcf,
import_args={
"path": "gs://gnomad-public/resources/grch37/kgp/1000G_phase1.snps.high_confidence.b37.vcf.bgz",
"force_bgz": True,
"skip_invalid_loci": True,
"min_partitions": 100,
"reference_genome": "GRCh37",
},
),
},
)
cpg_sites = TableResource(path="gs://gnomad-public/resources/grch37/cpg_sites/cpg.ht")
methylation_sites = TableResource(
import_args={
"path": "gs://gnomad-public/resources/grch37/decoy_intervals/mm-2-merged.GRCh37_compliant.bed",
"reference_genome": "GRCh37",
},
)
purcell_5k_intervals = TableResource(
path="gs://gnomad-public/resources/grch37/purcell_5k_intervals/purcell5k.ht",
import_func=hl.import_locus_intervals,
import_args={
"path": "gs://gnomad-public/resources/grch37/purcell_5k_intervals/purcell5k.interval_list",
"reference_genome": "GRCh37",
},
)
seg_dup_intervals = TableResource(
path="gs://gnomad-public/resources/grch37/seg_dup_intervals/hg19_self_chain_split_both.ht",
import_func=hl.import_bed,
import_args={
"path": "gs://gnomad-public/resources/grch37/seg_dup_intervals/hg19_self_chain_split_both.bed",
"reference_genome": "GRCh37",
},
)
exome_hc_intervals = TableResource(
path="gs://gnomad-public/resources/grch37/broad_intervals/exomes_high_coverage.auto.interval_list.ht",
import_func=hl.import_locus_intervals,
import_args={
"path": "gs://gnomad-public/resources/grch37/broad_intervals/exomes_high_coverage.auto.interval_list",
"reference_genome": "GRCh37",
},
)
"reference_genome": "GRCh37",
},
)
kgp_omni = TableResource(
path="gs://gnomad-public/resources/grch37/kgp/1000G_omni2.5.b37.ht",
import_func=import_sites_vcf,
import_args={
"path": "gs://gnomad-public/resources/grch37/kgp/1000G_omni2.5.b37.vcf.bgz",
"force_bgz": True,
"min_partitions": 100,
"reference_genome": "GRCh37",
},
)
mills = TableResource(
path="gs://gnomad-public/resources/grch37/mills/Mills_and_1000G_gold_standard.indels.b37.ht",
import_func=import_sites_vcf,
import_args={
"path": "gs://gnomad-public/resources/grch37/mills/Mills_and_1000G_gold_standard.indels.b37.vcf.bgz",
"force_bgz": True,
"min_partitions": 100,
"reference_genome": "GRCh37",
},
)
syndip = MatrixTableResource(
path="gs://gnomad-public/resources/grch37/syndip/hybrid.m37m.mt",
import_func=hl.import_vcf,
import_args={
"path": "gs://gnomad-public/resources/grch37/syndip/hybrid.m37m.vcf.bgz",
"min_partitions": 100,
def ld_index(pop: str) -> TableResource:
return TableResource(path=_ld_index_path("genomes", pop))
"""
if data_type not in DATA_TYPES:
raise DataException(f"{data_type} not in {DATA_TYPES}")
if data_type == "exomes":
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_release_ht_path(data_type, release))
for release in releases
},
def ld_scores(pop: str) -> TableResource:
return TableResource(path=_ld_scores_path("genomes", pop))
"""
if data_type not in DATA_TYPES:
raise DataException(f"{data_type} not in {DATA_TYPES}")
if data_type == "exomes":
current_release = "2.1"
releases = EXOME_RELEASES
releases.remove("2.1.1")
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_coverage_ht_path(data_type, release))
for release in releases
},
import_args={
"path": "gs://gnomad-public/resources/grch37/broad_intervals/hg19-v0-wgs_evaluation_regions.v1.interval_list",
"reference_genome": "GRCh37",
},
)
na12878_hc_intervals = TableResource(
path="gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_intervals.ht",
import_func=hl.import_bed,
import_args={
"path": "gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-Solid-10X_CHROM1-X_v3.3_highconf.bed",
"reference_genome": "GRCh37",
},
)
syndip_hc_intervals = TableResource(
path="gs://gnomad-public/resources/grch37/syndip/syndip_highconf_genome_intervals.ht",
import_func=hl.import_bed,
import_args={
"path": "gs://gnomad-public/resources/grch37/syndip/hybrid.m37m.bed",
"reference_genome": "GRCh37",
},
)
def get_truth_ht() -> hl.Table:
"""
Returns a table with the following annotations from the latest version of the corresponding truth data:
- hapmap
- kgp_omni (1000 Genomes intersection Onni 2.5M array)
- kgp_phase_1_hc (high confidence sites in 1000 genonmes)
- mills (Mills & Devine indels)
if data_type not in DATA_TYPES:
raise DataException(
f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
)
if data_type == "exomes":
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_release_ht_path(data_type, release))
for release in releases
},