How to use the hail.read_table function in hail

To help you get started, we’ve selected a few hail examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github macarthur-lab / gnomadjs / data / gnomad_r3 / prepare_gnomad_r3_variants.py View on Github external
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input")
    parser.add_argument("output")
    args = parser.parse_args()

    hl.init(log="/tmp/hail.log")

    ds = hl.read_table(args.input)
    ds = format_variants_table(ds)

    ds.describe()

    ds.write(args.output)
github hail-is / hail / benchmark / python / benchmark_hail / run / table_benchmarks.py View on Github external
def join_p100_p10(path_10, path_100):
    ht1 = hl.read_table(path_10)
    ht2 = hl.read_table(path_100)
    ht2.join(ht1)._force_count()
github Nealelab / UK_Biobank_GWAS / 0.2 / create_ldsc_hm3_table.py View on Github external
###
# Create filtered keytable with autosomal, biallelic HM3 snps
# with MAF > .01, INFO > 0.9 and passing QC in UKBB GWAS analysis set
#
# Also provides both UKBB and HM3 formatting of variant
###

if gs_missing(BUCKET + HM3_qcpos_stem + '.ht') or force_all:
    
    print("Creating Hail table of HM3 SNPs passing UKBB GWAS QC...")


    # get list of SNPs to be used in GWAS
    # filter here: autosomes, no indels, no MHC (filter early for efficiency)
    ukb_snps = hl.read_table(GWAS_snps).key_by('locus','alleles').repartition(500, shuffle=False)
#    ukb_snps = ukb_snps.annotate(sort_al=hl.sorted(ukb_snps.alleles))
    if verbose:
        print("\nCount 1: " + str(ukb_snps.count()) + '\n')
    
    ukb_snps = ukb_snps.filter(
                    hl.is_snp(ukb_snps.alleles[0], ukb_snps.alleles[1]) &
                    (~(ukb_snps.locus.contig == 'X')) &
                    (~( (ukb_snps.locus.contig == '6') & (ukb_snps.locus.position > 25000000) & (ukb_snps.locus.position < 34000000)))
                )
    if verbose:
        print("\nCount 2: " + str(ukb_snps.count()) + '\n')
    
    # merge in, filter on MAF from the UKBB GWAS sample
    ukb_qc = hl.import_table(GWAS_qc)
    ukb_qc = ukb_qc.annotate(vstruct = hl.parse_variant(ukb_qc.variant))
    ukb_qc = ukb_qc.annotate(locus = ukb_qc.vstruct.locus, alleles = ukb_qc.vstruct.alleles).key_by('locus','alleles')
github macarthur-lab / hail-elasticsearch-pipelines / luigi_pipeline / seqr_loading.py View on Github external
def run(self):
        mt = self.import_vcf()
        mt = hl.split_multi_hts(mt)
        if self.validate:
            self.validate_mt(mt, self.genome_version, self.sample_type)
        if self.remap_path:
            mt = self.remap_sample_ids(mt, self.remap_path)
        if self.subset_path:
            mt = self.subset_samples_and_variants(mt, self.subset_path)
        mt = HailMatrixTableTask.run_vep(mt, self.genome_version, self.vep_runner)

        ref_data = hl.read_table(self.reference_ht_path)
        clinvar = hl.read_table(self.clinvar_ht_path)
        hgmd = hl.read_table(self.hgmd_ht_path)

        mt = SeqrVariantsAndGenotypesSchema(mt, ref_data=ref_data, clinvar_data=clinvar, hgmd_data=hgmd).annotate_all(
            overwrite=True).select_annotated_mt()

        mt = SeqrGenotypesSchema(mt).annotate_all(overwrite=True).mt

        mt.describe()
        mt.write(self.output().path, stage_locally=True)
github macarthur-lab / gnomad_hail / utils.py View on Github external
def write_temp_gcs(t: Union[hl.MatrixTable, hl.Table], gcs_path: str,
                   overwrite: bool = False, temp_path: str = '/tmp.h') -> None:
    t.write(temp_path, overwrite=True)
    t = hl.read_matrix_table(temp_path) if isinstance(t, hl.MatrixTable) else hl.read_table(temp_path)
    t.write(gcs_path, overwrite=overwrite)
github macarthur-lab / hail-elasticsearch-pipelines / hail_scripts / v02 / export_table_to_es.py View on Github external
def export_table_to_elasticsearch(table_url, host, index_name, index_type, port=9200, num_shards=1, block_size=200):
    ds = hl.read_table(table_url)

    es = ElasticsearchClient(host, port)
    es.export_table_to_elasticsearch(
        ds,
        index_name=index_name,
        index_type_name=index_type,
        block_size=block_size,
        num_shards=num_shards,
        delete_index_before_exporting=True,
        export_globals_to_index_meta=True,
        verbose=True,
    )
github hail-is / hail / benchmark / python / benchmark_hail / run / table_benchmarks.py View on Github external
def join_p1000_p10(path_10, path_1000):
    ht1 = hl.read_table(path_10)
    ht2 = hl.read_table(path_1000)
    ht2.join(ht1)._force_count()
github Nealelab / UK_Biobank_GWAS / 0.2 / create_ldsc_hm3_table.py View on Github external
ukb_qc = ukb_qc.annotate(vstruct = hl.parse_variant(ukb_qc.variant))
    ukb_qc = ukb_qc.annotate(locus = ukb_qc.vstruct.locus, alleles = ukb_qc.vstruct.alleles).key_by('locus','alleles')
    ukb_qc2 = ukb_snps.join(ukb_qc.select(ukb_qc.minor_AF))
    if verbose:
        print("\nCount 3: " + str(ukb_qc2.count()) + '\n')
    
    ukb_qc2 = ukb_qc2.filter(
                    (hl.float(ukb_qc2.minor_AF) > 0.01) & 
                    (hl.float(ukb_qc2.minor_AF) < 0.99)
                )
    if verbose:
        print("\nCount 4: " + str(ukb_qc2.count()) + '\n')
    
    # merge in rsid, info (from full UKB sample)
    # and filter to info > 0.9
    ukb_mfi = hl.read_table(GWAS_mfi).key_by('locus','alleles').repartition(500, shuffle=False)
    ukb_qc3 = ukb_qc2.join(ukb_mfi.select('varid','variant','rsid','info'))
    if verbose:
        print("\nCount 5: " + str(ukb_qc3.count()) + '\n')
    
    ukb_qc3 = ukb_qc3.filter(
                    (ukb_qc3.info > 0.9)
                )
    if verbose:
        print("\nCount 6: " + str(ukb_qc3.count()) + '\n')
    
    # drop multi-allelic sites
    loc_count = (ukb_qc3.group_by(ukb_qc3.locus)
                        .aggregate(nloc=hl.agg.count())
                )
    loc_count = loc_count.filter(loc_count.nloc==1)
    if verbose:
github hail-is / hail / benchmark / python / benchmark_hail / run / table_benchmarks.py View on Github external
def read_force_count_p1000(path):
    hl.read_table(path)._force_count()