Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def filter_to_pass_and_reject(in_file, paired, out_dir=None):
"""Filter VCF to only those with a strict PASS/REJECT: somatic + germline.
Removes low quality calls filtered but also labeled with REJECT.
"""
from bcbio.heterogeneity import bubbletree
out_file = "%s-prfilter.vcf.gz" % utils.splitext_plus(in_file)[0]
if out_dir:
out_file = os.path.join(out_dir, os.path.basename(out_file))
if not utils.file_uptodate(out_file, in_file):
with file_transaction(paired.tumor_data, out_file) as tx_out_file:
max_depth = bubbletree.max_normal_germline_depth(in_file, bubbletree.PARAMS, paired)
tx_out_plain = tx_out_file.replace(".vcf.gz", ".vcf")
with contextlib.closing(cyvcf2.VCF(in_file)) as reader:
reader = _add_db_to_header(reader)
with contextlib.closing(cyvcf2.Writer(tx_out_plain, reader)) as writer:
for rec in reader:
filters = rec.FILTER.split(";") if rec.FILTER else []
other_filters = [x for x in filters if x not in ["PASS", ".", "REJECT"]]
if len(other_filters) == 0 or bubbletree.is_info_germline(rec):
# Germline, check if we should include based on frequencies
if "REJECT" in filters or bubbletree.is_info_germline(rec):
stats = bubbletree._is_possible_loh(rec, reader, bubbletree.PARAMS, paired,
use_status=True, max_normal_depth=max_depth)
if stats:
rec.FILTER = "PASS"
rec.INFO["DB"] = True
writer.write_record(rec)
# Somatic, always include
else:
writer.write_record(rec)
vcfutils.bgzip_and_index(tx_out_plain, paired.tumor_data["config"])
import sys
import re
from cyvcf2 import VCF, Writer
date = sys.argv[2]
vcf = VCF(sys.argv[1])
fhs = {'benign': Writer('clinvar-benign.%s.vcf' % date, vcf),
'benign-likely_benign': Writer('clinvar-benign-likely_benign.%s.vcf' % date, vcf),
'pathogenic': Writer('clinvar-pathogenic.%s.vcf' % date, vcf),
'expert': Writer('clinvar-pathogenic-expert.%s.vcf' % date, vcf),
'likely_pathogenic-pathogenic': Writer('clinvar-pathogenic-likely_pathogenic.%s.vcf' % date, vcf),
}
wtr = fhs['pathogenic']
fhs['likely_benign'] = fhs['benign-likely_benign']
fhs['likely_pathogenic'] = fhs['likely_pathogenic-pathogenic']
# benigns also get written just to the joint file
lookup = {}
lookup['benign'] = 'benign-likely_benign'
lookup['pathogenic'] = 'likely_pathogenic-pathogenic'
exclude = "criteria_provided,_conflicting_interpretations no_assertion_criteria_provided no_assertion_provided no_interpretation_for_the_single_variant".split()
for v in vcf:
return smax
if __name__ == "__main__":
import doctest
doctest.testmod()
vcf = VCF(sys.argv[1])
vcf.add_info_to_header({'ID': 'grantham', 'Description': 'grantham score',
'Type':'Integer', 'Number': '1'})
vcf.add_info_to_header({'ID': 'mis_badness', 'Description': 'missense badness score',
'Type':'Float', 'Number': '1'})
vcf.add_info_to_header({'ID': 'blosum', 'Description': 'blosum score',
'Type':'Integer', 'Number': '1'})
wtr = Writer("-", vcf)
for v in vcf:
csqs = v.INFO.get("BCSQ", "").split(",")
smax = None
for csq in csqs:
smax = score(csq, smax)
if not smax:
continue
for k in ('Grantham', 'mis_badness', 'BLOSUM'):
v.INFO[k.lower()] = smax[k]
wtr.write_record(v)
wtr.close()
def _remove_prioritization(in_file, data, out_dir=None):
"""Remove tumor-only prioritization and return non-filtered calls.
"""
out_file = "%s-germline.vcf" % utils.splitext_plus(in_file)[0]
if out_dir:
out_file = os.path.join(out_dir, os.path.basename(out_file))
if not utils.file_uptodate(out_file, in_file) and not utils.file_uptodate(out_file + ".gz", in_file):
with file_transaction(data, out_file) as tx_out_file:
reader = cyvcf2.VCF(str(in_file))
reader.add_filter_to_header({'ID': 'Somatic', 'Description': 'Variant called as Somatic'})
# with open(tx_out_file, "w") as out_handle:
# out_handle.write(reader.raw_header)
with contextlib.closing(cyvcf2.Writer(tx_out_file, reader)) as writer:
for rec in reader:
rec = _update_prioritization_filters(rec)
# out_handle.write(str(rec))
writer.write_record(rec)
return out_file
def write_pass_vcf(annotated_vcf, logger):
out_vcf = re.sub(r'\.annotated\.vcf\.gz$','.annotated.pass.vcf',annotated_vcf)
vcf = VCF(annotated_vcf)
w = Writer(out_vcf, vcf)
num_rejected = 0
num_pass = 0
for rec in vcf:
if rec.FILTER is None or rec.FILTER == 'None':
w.write_record(rec)
num_pass += 1
else:
num_rejected +=1
vcf.close()
w.close()
logger.info('Number of non-PASS/REJECTED variant calls: ' + str(num_rejected))
logger.info('Number of PASSed variant calls: ' + str(num_pass))
if num_pass == 0:
vcf_infotags_meta = annoutils.read_infotag_file(os.path.join(gvanno_db_directory,'gvanno_infotags.tsv'))
out_vcf = re.sub(r'\.vcf(\.gz){0,}$','.annotated.vcf',query_vcf)
meta_vep_dbnsfp_info = annoutils.vep_dbnsfp_meta_vcf(query_vcf, vcf_infotags_meta)
dbnsfp_prediction_algorithms = meta_vep_dbnsfp_info['dbnsfp_prediction_algorithms']
vep_csq_fields_map = meta_vep_dbnsfp_info['vep_csq_fieldmap']
vcf = VCF(query_vcf)
for tag in vcf_infotags_meta:
if lof_prediction == 0:
if not tag.startswith('LoF'):
vcf.add_info_to_header({'ID': tag, 'Description': str(vcf_infotags_meta[tag]['description']),'Type':str(vcf_infotags_meta[tag]['type']), 'Number': str(vcf_infotags_meta[tag]['number'])})
else:
vcf.add_info_to_header({'ID': tag, 'Description': str(vcf_infotags_meta[tag]['description']),'Type':str(vcf_infotags_meta[tag]['type']), 'Number': str(vcf_infotags_meta[tag]['number'])})
w = Writer(out_vcf, vcf)
current_chrom = None
num_chromosome_records_processed = 0
gvanno_xref_map = {'ENSEMBL_TRANSCRIPT_ID':0, 'ENSEMBL_GENE_ID':1, 'ENSEMBL_PROTEIN_ID':2,
'SYMBOL':3, 'SYMBOL_ENTREZ':4,'ENTREZ_ID':5, 'UNIPROT_ID':6, 'APPRIS':7,
'UNIPROT_ACC':8,'REFSEQ_MRNA':9, 'CORUM_ID':10,'TUMOR_SUPPRESSOR':11,
'TUMOR_SUPPRESSOR_EVIDENCE':12, 'ONCOGENE':13, 'ONCOGENE_EVIDENCE':14,'DISGENET_CUI':15,
'MIM_PHENOTYPE_ID':16, 'OPENTARGETS_DISEASE_ASSOCS':17,
'OPENTARGETS_TRACTABILITY_COMPOUND':18, 'OPENTARGETS_TRACTABILITY_ANTIBODY':19,
'PROB_HAPLOINSUFFICIENCY': 20,'PROB_EXAC_LOF_INTOLERANT':21,'PROB_EXAC_LOF_INTOLERANT_HOM':22,
'PROB_EXAC_LOF_TOLERANT_NULL':23,'PROB_EXAC_NONTCGA_LOF_INTOLERANT':24,
'PROB_EXAC_NONTCGA_LOF_INTOLERANT_HOM':25, 'PROB_EXAC_NONTCGA_LOF_TOLERANT_NULL': 26,
'PROB_GNOMAD_LOF_INTOLERANT':27, 'PROB_GNOMAD_LOF_INTOLERANT_HOM': 28, 'PROB_GNOMAD_LOF_TOLERANT_NULL':29,
'ESSENTIAL_GENE_CRISPR': 30, 'ESSENTIAL_GENE_CRISPR2': 31}
vcf_info_element_types = {}
for e in vcf.header_iter():
def to_vcf(self, path):
"""
Parse query result as vcf file.
Args:
path: path of the file.
"""
from cyvcf2 import Writer
writer = Writer(path, self.vcf)
for v in self:
writer.write_record(v.source)