How to use the medaka.vcf.Variant function in medaka

To help you get started, we’ve selected a few medaka examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nanoporetech / medaka / medaka / vcf.py View on Github external
if alts[0] == alts[1]:  # homozygous 1/1
            gt = gt_sep.join(len(haps) * '1')
            alts = alts[:1]
        else:  # heterozygous 1/2
            gt = gt_sep.join(map(str, haps))
    else:  # heterozygous 0/1
        assert len(haps) == 1
        gts = [0, 1]  # appropriate if hap with variant is 2
        if not discard_phase:
            if int(haps[0]) == 1:
                gts = [1, 0]
        gt = gt_sep.join(map(str, gts))

    genotype_data = {'GT': gt, 'GQ': qual}

    return Variant(
        v.chrom, interval.begin, ref, alt=alts,
        filt='PASS', info=info, qual=qual,
        genotype_data=genotype_data).trim()
github nanoporetech / medaka / medaka / labels.py View on Github external
results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))

            # heterozygous, one deletion
            elif all((is_het,
                      contains_nonref,
                      contains_deletion)):

                qual = self._pfmt(self._phred(1 - primary_prob))
                alt = [s for s in call if s != '*']
                gt = '1/1'
                genotype = {'GT': gt, 'GQ': qual}

                results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))

            # no snp at this location
            else:
                if return_all:
                    qual = self._pfmt(ref_prob)
                    # return variant even though it is not a snp
                    genotype = {'GT': '0/0', 'GQ': qual}
                    results.append(medaka.vcf.Variant(
                        ref_name, pos, ref_symbol, alt='.', filt='PASS',
                        info=info, qual=qual, genotype_data=genotype))
        return results
github nanoporetech / medaka / medaka / vcf.py View on Github external
def _parse(self):
        # just parsing the file to yield records
        last_pos = [None, None]
        with open(self.filename, encoding='utf-8') as handle:
            for index, line in enumerate(handle):
                line = line.replace('\n', '')

                # Already read meta and header in self.__init__
                if line.startswith('#'):
                    continue

                try:
                    variant = Variant.from_text(line)
                except Exception as e:
                    raise IOError(
                        'Exception while reading variant #{}.\n'
                        'Line: {}'.format(index, line)) from e

                if variant.chrom != last_pos[0]:
                    last_pos = [variant.chrom, None]
                elif last_pos[1] is not None and last_pos[1] > variant.pos:
                    raise IOError(
                        '.vcf is unsorted at index #{}.'.format(index))
                if variant.chrom not in self.chroms:
                    self.chroms.append(variant.chrom)
                yield variant
                last_pos[1] = variant.pos
github nanoporetech / medaka / medaka / labels.py View on Github external
genotype = {'GT': '1/1', 'GQ': qual}
                results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))

            # heterozygous, no deletions
            elif all((is_het,
                      not contains_deletion)):

                err = 1 - 0.5 * (primary_prob + secondary_prob)
                qual = self._pfmt(self._phred(err))
                alt = [s for s in call if s != ref_symbol]
                gt = '0/1' if len(alt) == 1 else '1/2'
                genotype = {'GT': gt, 'GQ': qual}

                results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))

            # heterozygous, one deletion
            elif all((is_het,
                      contains_nonref,
                      contains_deletion)):

                qual = self._pfmt(self._phred(1 - primary_prob))
                alt = [s for s in call if s != '*']
                gt = '1/1'
                genotype = {'GT': gt, 'GQ': qual}

                results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))
github nanoporetech / medaka / medaka / labels.py View on Github external
genotype = {'GT': gt, 'GQ': qual}

                results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))

            # heterozygous, secondary deletion
            elif all((not primary_is_reference,
                      not primary_is_deletion,
                      secondary_is_deletion,
                      secondary_exceeds_threshold)):

                alt = primary_call
                qual = self._pfmt(self._phred(1 - primary_prob))
                genotype = {'GT': '1/1', 'GQ': qual}
                results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))

            # no snp at this location
            else:
                if return_all:
                    # return variant even though it is not a snp
                    qual = self._pfmt(self._phred(1 - primary_prob))
                    genotype = {'GT': '0/0', 'GQ': qual}
                    results.append(medaka.vcf.Variant(
                        ref_name, pos, ref_symbol, alt='.', filt='PASS',
                        info=info, qual=qual, genotype_data=genotype))
        return results
github nanoporetech / medaka / medaka / labels.py View on Github external
}
            else:
                info = {}

            # log likelihood ratio
            qual = self._pfmt(sum(pred_quals) - sum(ref_quals))
            genotype = {'GT': '1', 'GQ': qual}

            var_pos = pos['major'][start]

            if pad_del_at_start_of_chunk:
                var_pos -= 1
                var_ref = ref_seq[var_pos] + var_ref
                var_pred = ref_seq[var_pos] + var_pred

            variant = medaka.vcf.Variant(sample.ref_name, var_pos, var_ref,
                                         alt=var_pred, filt='PASS', info=info,
                                         qual=qual, genotype_data=genotype)
            variant = variant.trim()
            variants.append(variant)

        return variants
github nanoporetech / medaka / medaka / labels.py View on Github external
results = list()
        data = zip(outputs, argmax, probs, quals, positions, ref_symbols)
        for network_output, amax, prob, qual, pos, ref_symbol in data:
            call = self._decoding[amax]

            # notes: we output only variants with one or more substitutions,
            #        and deletions are masked from the records. TODO: is this
            #        really the behaviour we want?
            if call == (ref_symbol, ref_symbol):  # is reference
                if return_all:
                    # return variant even though it is not a snp
                    q = self._pfmt(qual)
                    genotype = {'GT': '0/0', 'GQ': q}
                    info = _make_info(ref_symbol, prob, call)
                    results.append(medaka.vcf.Variant(
                        ref_name, pos, ref_symbol, alt='.', filt='PASS',
                        info=info, qual=q, genotype_data=genotype))
            else:  # is variant
                contains_deletion = '*' in call
                if not self._singleton(call):  # heterozygous
                    if not contains_deletion:
                        alt = [s for s in call if s != ref_symbol]
                        gt = '0/1' if len(alt) == 1 else '1/2'
                        q = self._pfmt(qual)
                        genotype = {'GT': gt, 'GQ': q}
                        info = _make_info(ref_symbol, prob, call)
                        results.append(medaka.vcf.Variant(
                            ref_name, pos, ref_symbol, alt, filt='PASS',
                            info=info, qual=q, genotype_data=genotype))
                    else:  # with deletion
                        # disallow (ref, *), and record (alt, *) as (alt, alt)
github nanoporetech / medaka / medaka / labels.py View on Github external
qual = self._pfmt(self._phred(1 - primary_prob))
                alt = [s for s in call if s != '*']
                gt = '1/1'
                genotype = {'GT': gt, 'GQ': qual}

                results.append(medaka.vcf.Variant(
                    ref_name, pos, ref_symbol, alt, filt='PASS',
                    info=info, qual=qual, genotype_data=genotype))

            # no snp at this location
            else:
                if return_all:
                    qual = self._pfmt(ref_prob)
                    # return variant even though it is not a snp
                    genotype = {'GT': '0/0', 'GQ': qual}
                    results.append(medaka.vcf.Variant(
                        ref_name, pos, ref_symbol, alt='.', filt='PASS',
                        info=info, qual=qual, genotype_data=genotype))
        return results
github nanoporetech / medaka / medaka / labels.py View on Github external
q = self._pfmt(qual)
                    genotype = {'GT': '0/0', 'GQ': q}
                    info = _make_info(ref_symbol, prob, call)
                    results.append(medaka.vcf.Variant(
                        ref_name, pos, ref_symbol, alt='.', filt='PASS',
                        info=info, qual=q, genotype_data=genotype))
            else:  # is variant
                contains_deletion = '*' in call
                if not self._singleton(call):  # heterozygous
                    if not contains_deletion:
                        alt = [s for s in call if s != ref_symbol]
                        gt = '0/1' if len(alt) == 1 else '1/2'
                        q = self._pfmt(qual)
                        genotype = {'GT': gt, 'GQ': q}
                        info = _make_info(ref_symbol, prob, call)
                        results.append(medaka.vcf.Variant(
                            ref_name, pos, ref_symbol, alt, filt='PASS',
                            info=info, qual=q, genotype_data=genotype))
                    else:  # with deletion
                        # disallow (ref, *), and record (alt, *) as (alt, alt)
                        contains_nonref_nondel = len(
                            [s for s in call
                                if s != ref_symbol and s != '*']) > 0
                        if contains_nonref_nondel:
                            alt = [s for s in call if s != '*']
                            gt = '1/1'
                            q = self._pfmt(qual)
                            genotype = {'GT': gt, 'GQ': q}
                            info = _make_info(ref_symbol, prob, call)
                            results.append(medaka.vcf.Variant(
                                ref_name, pos, ref_symbol, alt, filt='PASS',
                                info=info, qual=q, genotype_data=genotype))