How to use the cyvcf2.Writer function in cyvcf2

To help you get started, we’ve selected a few cyvcf2 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bcbio / bcbio-nextgen / bcbio / variation / germline.py View on Github external
def filter_to_pass_and_reject(in_file, paired, out_dir=None):
    """Filter VCF to only those with a strict PASS/REJECT: somatic + germline.

    Removes low quality calls filtered but also labeled with REJECT.
    """
    from bcbio.heterogeneity import bubbletree
    out_file = "%s-prfilter.vcf.gz" % utils.splitext_plus(in_file)[0]
    if out_dir:
        out_file = os.path.join(out_dir, os.path.basename(out_file))
    if not utils.file_uptodate(out_file, in_file):
        with file_transaction(paired.tumor_data, out_file) as tx_out_file:
            max_depth = bubbletree.max_normal_germline_depth(in_file, bubbletree.PARAMS, paired)
            tx_out_plain = tx_out_file.replace(".vcf.gz", ".vcf")
            with contextlib.closing(cyvcf2.VCF(in_file)) as reader:
                reader = _add_db_to_header(reader)
                with contextlib.closing(cyvcf2.Writer(tx_out_plain, reader)) as writer:
                    for rec in reader:
                        filters = rec.FILTER.split(";") if rec.FILTER else []
                        other_filters = [x for x in filters if x not in ["PASS", ".", "REJECT"]]
                        if len(other_filters) == 0 or bubbletree.is_info_germline(rec):
                            # Germline, check if we should include based on frequencies
                            if "REJECT" in filters or bubbletree.is_info_germline(rec):
                                stats = bubbletree._is_possible_loh(rec, reader, bubbletree.PARAMS, paired,
                                                                    use_status=True, max_normal_depth=max_depth)
                                if stats:
                                    rec.FILTER = "PASS"
                                    rec.INFO["DB"] = True
                                    writer.write_record(rec)
                            # Somatic, always include
                            else:
                                writer.write_record(rec)
            vcfutils.bgzip_and_index(tx_out_plain, paired.tumor_data["config"])
github quinlan-lab / pathoscore / truth-sets / GRCh37 / clinvar / make.py View on Github external
import sys
import re
from cyvcf2 import VCF, Writer

date = sys.argv[2]

vcf = VCF(sys.argv[1])

fhs = {'benign': Writer('clinvar-benign.%s.vcf' % date, vcf),
       'benign-likely_benign': Writer('clinvar-benign-likely_benign.%s.vcf' % date, vcf),
       'pathogenic': Writer('clinvar-pathogenic.%s.vcf' % date, vcf),
       'expert': Writer('clinvar-pathogenic-expert.%s.vcf' % date, vcf),
       'likely_pathogenic-pathogenic': Writer('clinvar-pathogenic-likely_pathogenic.%s.vcf' % date, vcf),
       }
wtr = fhs['pathogenic']

fhs['likely_benign'] = fhs['benign-likely_benign']
fhs['likely_pathogenic'] = fhs['likely_pathogenic-pathogenic']

# benigns also get written just to the joint file
lookup = {}
lookup['benign'] = 'benign-likely_benign'
lookup['pathogenic'] = 'likely_pathogenic-pathogenic'

exclude = "criteria_provided,_conflicting_interpretations no_assertion_criteria_provided no_assertion_provided no_interpretation_for_the_single_variant".split()

for v in vcf:
github quinlan-lab / pathoscore / truth-sets / score.py View on Github external
return smax

if __name__ == "__main__":
    import doctest
    doctest.testmod()

    vcf = VCF(sys.argv[1])
    vcf.add_info_to_header({'ID': 'grantham', 'Description': 'grantham score',
                            'Type':'Integer', 'Number': '1'})
    vcf.add_info_to_header({'ID': 'mis_badness', 'Description': 'missense badness score',
                            'Type':'Float', 'Number': '1'})
    vcf.add_info_to_header({'ID': 'blosum', 'Description': 'blosum score',
                            'Type':'Integer', 'Number': '1'})


    wtr = Writer("-", vcf)

    for v in vcf:
        csqs = v.INFO.get("BCSQ", "").split(",")
        smax = None
        for csq in csqs:
            smax = score(csq, smax)
        if not smax:
            continue

        for k in ('Grantham', 'mis_badness', 'BLOSUM'):
            v.INFO[k.lower()] = smax[k]

        wtr.write_record(v)
    wtr.close()
github bcbio / bcbio-nextgen / bcbio / variation / germline.py View on Github external
def _remove_prioritization(in_file, data, out_dir=None):
    """Remove tumor-only prioritization and return non-filtered calls.
    """
    out_file = "%s-germline.vcf" % utils.splitext_plus(in_file)[0]
    if out_dir:
        out_file = os.path.join(out_dir, os.path.basename(out_file))
    if not utils.file_uptodate(out_file, in_file) and not utils.file_uptodate(out_file + ".gz", in_file):
        with file_transaction(data, out_file) as tx_out_file:
            reader = cyvcf2.VCF(str(in_file))
            reader.add_filter_to_header({'ID': 'Somatic', 'Description': 'Variant called as Somatic'})
            # with open(tx_out_file, "w") as out_handle:
            #     out_handle.write(reader.raw_header)
            with contextlib.closing(cyvcf2.Writer(tx_out_file, reader)) as writer:
                for rec in reader:
                    rec = _update_prioritization_filters(rec)
                    # out_handle.write(str(rec))
                    writer.write_record(rec)
    return out_file
github sigven / gvanno / src / gvanno / lib / annoutils.py View on Github external
def write_pass_vcf(annotated_vcf, logger):
   
   out_vcf = re.sub(r'\.annotated\.vcf\.gz$','.annotated.pass.vcf',annotated_vcf)
   vcf = VCF(annotated_vcf)
   w = Writer(out_vcf, vcf)

   num_rejected = 0
   num_pass = 0
   for rec in vcf:
      if rec.FILTER is None or rec.FILTER == 'None':
         w.write_record(rec)
         num_pass += 1
      else:
         num_rejected +=1

   vcf.close()
   w.close()
   
   logger.info('Number of non-PASS/REJECTED variant calls: ' + str(num_rejected))
   logger.info('Number of PASSed variant calls: ' + str(num_pass))
   if num_pass == 0:
github sigven / gvanno / src / gvanno / gvanno_summarise.py View on Github external
vcf_infotags_meta = annoutils.read_infotag_file(os.path.join(gvanno_db_directory,'gvanno_infotags.tsv'))
   out_vcf = re.sub(r'\.vcf(\.gz){0,}$','.annotated.vcf',query_vcf)

   meta_vep_dbnsfp_info = annoutils.vep_dbnsfp_meta_vcf(query_vcf, vcf_infotags_meta)
   dbnsfp_prediction_algorithms = meta_vep_dbnsfp_info['dbnsfp_prediction_algorithms']
   vep_csq_fields_map = meta_vep_dbnsfp_info['vep_csq_fieldmap']
   vcf = VCF(query_vcf)
   for tag in vcf_infotags_meta:
      if lof_prediction == 0:
         if not tag.startswith('LoF'):
            vcf.add_info_to_header({'ID': tag, 'Description': str(vcf_infotags_meta[tag]['description']),'Type':str(vcf_infotags_meta[tag]['type']), 'Number': str(vcf_infotags_meta[tag]['number'])})
      else:
         vcf.add_info_to_header({'ID': tag, 'Description': str(vcf_infotags_meta[tag]['description']),'Type':str(vcf_infotags_meta[tag]['type']), 'Number': str(vcf_infotags_meta[tag]['number'])})

   
   w = Writer(out_vcf, vcf)
   current_chrom = None
   num_chromosome_records_processed = 0
   gvanno_xref_map = {'ENSEMBL_TRANSCRIPT_ID':0, 'ENSEMBL_GENE_ID':1, 'ENSEMBL_PROTEIN_ID':2, 
                     'SYMBOL':3, 'SYMBOL_ENTREZ':4,'ENTREZ_ID':5, 'UNIPROT_ID':6, 'APPRIS':7,
                     'UNIPROT_ACC':8,'REFSEQ_MRNA':9, 'CORUM_ID':10,'TUMOR_SUPPRESSOR':11,
                     'TUMOR_SUPPRESSOR_EVIDENCE':12, 'ONCOGENE':13, 'ONCOGENE_EVIDENCE':14,'DISGENET_CUI':15,
                     'MIM_PHENOTYPE_ID':16, 'OPENTARGETS_DISEASE_ASSOCS':17,
                     'OPENTARGETS_TRACTABILITY_COMPOUND':18, 'OPENTARGETS_TRACTABILITY_ANTIBODY':19,
                     'PROB_HAPLOINSUFFICIENCY': 20,'PROB_EXAC_LOF_INTOLERANT':21,'PROB_EXAC_LOF_INTOLERANT_HOM':22,
                     'PROB_EXAC_LOF_TOLERANT_NULL':23,'PROB_EXAC_NONTCGA_LOF_INTOLERANT':24,
                     'PROB_EXAC_NONTCGA_LOF_INTOLERANT_HOM':25, 'PROB_EXAC_NONTCGA_LOF_TOLERANT_NULL': 26,
                     'PROB_GNOMAD_LOF_INTOLERANT':27, 'PROB_GNOMAD_LOF_INTOLERANT_HOM': 28, 'PROB_GNOMAD_LOF_TOLERANT_NULL':29,
                     'ESSENTIAL_GENE_CRISPR': 30, 'ESSENTIAL_GENE_CRISPR2': 31}
   
   vcf_info_element_types = {}
   for e in vcf.header_iter():
github kipoi / kipoiseq / kipoiseq / extractors / vcf_query.py View on Github external
def to_vcf(self, path):
        """
        Parse query result as vcf file.

        Args:
          path: path of the file.
        """
        from cyvcf2 import Writer
        writer = Writer(path, self.vcf)
        for v in self:
            writer.write_record(v.source)

cyvcf2

fast vcf parsing with cython + htslib

MIT
Latest version published 6 months ago

Package Health Score

65 / 100
Full package analysis

Similar packages