How to use the medaka.common.reverse_complement function in medaka

To help you get started, we’ve selected a few medaka examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nanoporetech / medaka / medaka / smolecule.py View on Github external
# align False requires forked minimap2, and isn't much faster for
            # a small number of sequences due to index construction time.
            warnings.warn("`align` is ignored", DeprecationWarning)
        alignments = []
        aligner = mappy.Aligner(seq=template, preset='map-ont')
        for sr in self.subreads:
            try:
                hit = next(aligner.map(sr.seq))
            except StopIteration:
                continue
            else:
                flag = 0 if hit.strand == 1 else 16
                if hit.strand == 1:
                    seq = sr.seq
                else:
                    seq = medaka.common.reverse_complement(sr.seq)
                clip = [
                    '' if x == 0 else '{}S'.format(x)
                    for x in (hit.q_st, len(sr.seq) - hit.q_en)]
                if hit.strand == -1:
                    clip = clip[::-1]
                cigstr = ''.join((clip[0], hit.cigar_str, clip[1]))
                aln = Alignment(
                    template_name, sr.name, flag, hit.r_st, seq, cigstr)
                alignments.append(aln)
                hit = None
        return alignments
github nanoporetech / medaka / medaka / rle.py View on Github external
fast5_path = fast5_path[0]

        try:
            fast5_call, wl, wk = get_rl_params(
                alignment.query_name, fast5_path)
        except KeyError:
            msg = 'RLE table not found for file {}, read {}'
            logger.info(msg.format(fast5_path, alignment.query_name))
            return None

        # params needs flipping for reverse alignments
        if alignment.is_reverse:
            wl = wl[::-1]
            wk = wk[::-1]
            fast5_call = medaka.common.reverse_complement(fast5_call)

        # ocasional errors where bases are repeated
        # may lead to discrepancies, discard these
        if fast5_call != query_rle.compact_basecall:
            logger.warning(
                'RLE table within fast5 file is inconsistent with '
                'compressed basecall for read {}. {} != {}'.format(
                    alignment.query_name,
                    fast5_call,
                    query_rle.compact_basecall))
            return None
        tags = {'WL': wl, 'WK': wk}
    else:
        tags = dict()

    # Create alignment object
github nanoporetech / medaka / medaka / smolecule.py View on Github external
def align_to_template(self, template, template_name):
        """Align subreads to a template sequence using Smith-Waterman.

        :param template: sequence to which to align subreads.
        :param template_name: name of template sequence.

        :returns: `Alignment` tuples.

        """
        self.initialize()
        alignments = []
        for orient, sr in zip(self._orient, self.subreads):
            if orient:
                seq = sr.seq
            else:
                seq = medaka.common.reverse_complement(sr.seq)
            result = parasail.sw_trace_striped_16(
                seq, template, 8, 4, parasail.dnafull)
            if result.cigar.beg_ref >= result.end_ref or \
                    result.cigar.beg_query >= result.end_query:
                # unsure why this can happen
                continue
            rstart, cigar = parasail_to_sam(result, seq)
            flag = 0 if orient else 16
            aln = Alignment(template_name, sr.name, flag, rstart, seq, cigar)
            alignments.append(aln)
        return alignments
github nanoporetech / medaka / medaka / smolecule.py View on Github external
def orient_subreads(self):
        """Find orientation of subreads with respect to consensus sequence.

        :returns: `Alignment` s of subreads to consensus.

        """
        # TODO: use a profile here
        # TODO: refactor with align_to_template
        self._orient = []
        alignments = []
        for sr in self.subreads:
            rc_seq = medaka.common.reverse_complement(sr.seq)
            result_fwd = parasail.sw_trace_striped_16(
                sr.seq, self.consensus, 8, 4, parasail.dnafull)
            result_rev = parasail.sw_trace_striped_16(
                rc_seq, self.consensus, 8, 4, parasail.dnafull)
            is_fwd = result_fwd.score > result_rev.score
            self._orient.append(is_fwd)
            result = result_fwd if is_fwd else result_rev
            seq = sr.seq if is_fwd else rc_seq
            if result.cigar.beg_ref >= result.end_ref or \
                    result.cigar.beg_query >= result.end_query:
                # unsure why this can happen
                continue
            rstart, cigar = parasail_to_sam(result, seq)
            flag = 0 if is_fwd else 16
            aln = Alignment(
                'consensus_{}'.format(self.name), sr.name,
github nanoporetech / medaka / medaka / methdaka.py View on Github external
:param read: a `Read` namedtuple.
    :param tags: list containing additional tags to add to sam record.

    :returns: a (string) sam record.
    """
    try:
        align = list(aligner.map(read.sequence, MD=True, cs=True))[0]
    except IndexError:
        return None
    if align.strand == +1:
        flag = '0'
        seq = read.sequence
        qstring = read.quality
    else:
        flag = '16'
        seq = medaka.common.reverse_complement(read.sequence)
        qstring = read.quality[::-1]
    rname = align.ctg
    pos = str(align.r_st + 1)
    mapq = str(align.mapq)
    clip = [
        '' if x == 0 else '{}S'.format(x)
        for x in (align.q_st, len(seq) - align.q_en)]
    if align.strand == -1:
        clip = clip[::-1]
    cigar = clip[0] + align.cigar_str + clip[1]
    NM = 'NM:i:' + str(align.NM)

    # NOTE: tags written without reversal, see below
    sam = '\t'.join((
        read.read_id, flag, rname, pos, mapq, cigar,
        '*', '0', '0', seq, qstring, NM, *tags))
github nanoporetech / medaka / medaka / smolecule.py View on Github external
def poa_consensus(self, additional_seq=None, method='racon'):
        """Create a consensus sequence for the read."""
        self.initialize()
        with tempfile.NamedTemporaryFile(
                'w', suffix='.fasta', delete=False) as fh:
            if additional_seq is not None:
                fh.write(">{}\n{}\n".format('additional', additional_seq))
            for orient, subread in zip(self._orient, self.subreads):
                if method == 'spoa':
                    if orient:
                        seq = subread.seq
                    else:
                        seq = medaka.common.reverse_complement(subread.seq)
                else:
                    seq = subread.seq
                fh.write(">{}\n{}\n".format(subread.name, seq))
            fh.flush()

            if method == 'spoa':
                consensus_seq = self._run_spoa(fh.name)
            elif method == 'racon':
                consensus_seq = self._run_racon(fh.name)
            else:
                raise ValueError('Unrecognised method: {}.'.format(method))
        self.consensus = consensus_seq
        self._alignments_valid = False
        self.consensus_run = True
        return consensus_seq