Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(args):
hl.init(log='/liftover.log')
if args.gnomad:
gnomad = True
path = None
if args.exomes:
data_type = 'exomes'
if args.genomes:
data_type = 'genomes'
logger.info('Working on gnomAD {} release ht'.format(data_type))
logger.info('Reading in release ht')
t = public_release(data_type).ht()
logger.info('Variants in release ht: {}'.format(t.count()))
else:
data_type = None
gnomad = False
if args.ht:
path = args.ht
t = hl.read_table(args.ht)
if args.mt:
path = args.mt
t = hl.read_matrix_table(args.mt)
logger.info('Checking if input data has been split')
if 'was_split' not in t.row:
t = hl.split_multi(t) if isinstance(t, hl.Table) else hl.split_multi_hts(t)
def get_r_within_gene(bm: BlockMatrix, ld_index: hl.Table, gene: str, vep_ht: hl.Table = None, reference_genome: str = None):
"""
Gets LD information (`r`) for all pairs of variants within `gene`.
Warning: this returns a table quadratic in number of variants. Exercise caution with large genes.
:param bm: Input Block Matrix
:param ld_index: Corresponding index table
:param gene: Gene symbol as string
:param vep_ht: Table with VEP annotations (if None, gets from get_gnomad_public_data())
:param reference_genome: Reference genome to pass to get_gene_intervals for fast filtering to gene
:return: Table with pairs of variants
"""
if vep_ht is None:
vep_ht = public_release('exomes').ht()
if reference_genome is None:
reference_genome = hl.default_reference().name
intervals = hl.experimental.get_gene_intervals(gene_symbols=[gene], reference_genome=reference_genome)
ld_index = hl.filter_intervals(ld_index, intervals)
ld_index = ld_index.annotate(vep=vep_ht[ld_index.key].vep)
ld_index = ld_index.filter(hl.any(lambda tc: tc.gene_symbol == gene, ld_index.vep.transcript_consequences))
indices_to_keep = ld_index.idx.collect()
filt_bm = bm.filter(indices_to_keep, indices_to_keep)
ht = filt_bm.entries()
ld_index = ld_index.add_index('new_idx').key_by('new_idx')
return ht.transmute(r=ht.entry, i_variant=ld_index[ht.i], j_variant=ld_index[ht.j])