How to use the medaka.common.Relationship function in medaka

To help you get started, we’ve selected a few medaka examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nanoporetech / medaka / medaka / variant.py View on Github external
start_2 = None

    for s2 in itertools.chain(sample_gen, (None,)):
        s1_name = 'Unknown' if s1 is None else s1.name
        s2_name = 'Unknown' if s2 is None else s2.name

        is_last_in_contig = False
        # s1 is last chunk
        if s2 is None:
            # go to end of s1
            end_1 = None
            is_last_in_contig = True
        else:
            rel = medaka.common.Sample.relative_position(s1, s2)
            # skip s2 if it is contained within s1
            if rel is medaka.common.Relationship.s2_within_s1:
                logger.info('{} is contained within {}, skipping.'.format(
                    s2_name, s1_name))
                continue
            elif rel is medaka.common.Relationship.forward_overlap:
                end_1, start_2, _ = medaka.common.Sample.overlap_indices(
                    s1, s2)
            elif rel is medaka.common.Relationship.forward_gapped:
                is_last_in_contig = True
                end_1, start_2 = (None, None)
                msg = '{} and {} cannot be concatenated as there is ' + \
                      'no overlap and they do not abut.'
                logger.info(msg.format(s1_name, s2_name))
            else:
                raise RuntimeError(
                    'Unexpected sample relationship {} '
                    'between {} and {}'.format(repr(rel), s1.name, s2.name))
github nanoporetech / medaka / medaka / common.py View on Github external
else:
                return Relationship.s1_within_s2

        # do samples abut?
        elif ordered_abuts(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_abutted
            else:
                return Relationship.reverse_abutted

        # do samples overlap?
        elif ordered_overlaps(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_overlap
            else:
                return Relationship.reverse_overlap

        # if we got this far there should be a gap between s1_ord and s2_ord
        elif ordered_gapped(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_gapped
            else:
                return Relationship.reverse_gapped

        else:
            raise RuntimeError(
                'Could not calculate relative position of {} and {}'.format(
                    s1.name, s2.name))
github nanoporetech / medaka / medaka / common.py View on Github external
:param s1: First `Sample` object.
        :param s2: Second `Sample` object.

        :returns: (end1, start2
        :raises: `OverlapException` if samples do not overlap nor abut.

        """
        heuristic = False
        rel = Sample.relative_position(s1, s2)

        # trivial case
        if rel is Relationship.forward_abutted:
            return None, None, heuristic

        if rel is not Relationship.forward_overlap:
            msg = 'Cannot overlap samples {} and {} with relationhip {}'
            raise OverlapException(msg.format(s1.name, s2.name, repr(rel)))

        # find where the overlap starts (ends) in s1 (s2) indices
        ovl_start_ind1 = np.searchsorted(s1.positions, s2.positions[0])
        ovl_end_ind2 = np.searchsorted(
            s2.positions, s1.positions[-1], side='right')

        end_1_ind, start_2_ind = None, None
        pos1_ovl = s1.positions[ovl_start_ind1:]
        pos2_ovl = s2.positions[0:ovl_end_ind2]
        try:
            # the nice case where everything lines up
            if not np.array_equal(pos1_ovl['minor'], pos2_ovl['minor']):
                raise OverlapException("Overlaps are not equal in structure")
            overlap_len = len(pos1_ovl)
github nanoporetech / medaka / medaka / common.py View on Github external
"""Check for grap between end of s1 and start of s2."""
            s1_end_maj, s1_end_min = s1.last_pos
            s2_start_maj, s2_start_min = s2.first_pos
            gapped = False
            if s2_start_maj > s1_end_maj + 1:  # gap in major
                gapped = True
            elif (s2_start_maj > s1_end_maj and
                    s2_start_min > 0):  # missing minors
                gapped = True
            elif (s2_start_maj == s1_end_maj and
                    s2_start_min > s1_end_min + 1):  # missing minors
                gapped = True
            return gapped

        if s1.ref_name != s2.ref_name:  # different ref_names
            return Relationship.different_ref_name

        s1_ord, s2_ord = sorted((s1, s2), key=lambda x: (x.first_pos, -x.size))
        is_ordered = s1_ord.name == s1.name

        # is one sample within the other?
        if ordered_contained(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.s2_within_s1
            else:
                return Relationship.s1_within_s2

        # do samples abut?
        elif ordered_abuts(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_abutted
            else:
github nanoporetech / medaka / medaka / common.py View on Github external
def from_samples(samples):
        """Create a sample by concatenating an iterable of `Sample` objects.

        :param samples: iterable of `Sample` objects.
        :returns: `Sample` obj
        """
        samples = list(samples)
        for s1, s2 in zip(samples[0:-1], samples[1:]):
            rel = Sample.relative_position(s1, s2)
            if rel is not Relationship.forward_abutted:
                msg = (
                    'Refusing to concatenate unordered/non-abutting '
                    'samples {} and {} with relationship {}.')
                raise ValueError(msg.format(s1.name, s2.name, repr(rel)))

        # Relationship.forward_abutted guarantees all samples have the
        # same ref_name
        non_concat_fields = {'ref_name'}

        def concat_attr(attr):
            vals = [getattr(s, attr) for s in samples]
            if attr not in non_concat_fields:
                all_none = all([v is None for v in vals])
                c = np.concatenate(vals) if not all_none else None
            else:
                assert len(set(vals)) == 1
github nanoporetech / medaka / medaka / common.py View on Github external
if is_ordered:
                return Relationship.forward_abutted
            else:
                return Relationship.reverse_abutted

        # do samples overlap?
        elif ordered_overlaps(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_overlap
            else:
                return Relationship.reverse_overlap

        # if we got this far there should be a gap between s1_ord and s2_ord
        elif ordered_gapped(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_gapped
            else:
                return Relationship.reverse_gapped

        else:
            raise RuntimeError(
                'Could not calculate relative position of {} and {}'.format(
                    s1.name, s2.name))
github nanoporetech / medaka / medaka / common.py View on Github external
s1_ord, s2_ord = sorted((s1, s2), key=lambda x: (x.first_pos, -x.size))
        is_ordered = s1_ord.name == s1.name

        # is one sample within the other?
        if ordered_contained(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.s2_within_s1
            else:
                return Relationship.s1_within_s2

        # do samples abut?
        elif ordered_abuts(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_abutted
            else:
                return Relationship.reverse_abutted

        # do samples overlap?
        elif ordered_overlaps(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_overlap
            else:
                return Relationship.reverse_overlap

        # if we got this far there should be a gap between s1_ord and s2_ord
        elif ordered_gapped(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_gapped
            else:
                return Relationship.reverse_gapped

        else:
github nanoporetech / medaka / medaka / common.py View on Github external
else:
                return Relationship.reverse_abutted

        # do samples overlap?
        elif ordered_overlaps(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_overlap
            else:
                return Relationship.reverse_overlap

        # if we got this far there should be a gap between s1_ord and s2_ord
        elif ordered_gapped(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_gapped
            else:
                return Relationship.reverse_gapped

        else:
            raise RuntimeError(
                'Could not calculate relative position of {} and {}'.format(
                    s1.name, s2.name))
github nanoporetech / medaka / medaka / variant.py View on Github external
s2_name = 'Unknown' if s2 is None else s2.name

        is_last_in_contig = False
        # s1 is last chunk
        if s2 is None:
            # go to end of s1
            end_1 = None
            is_last_in_contig = True
        else:
            rel = medaka.common.Sample.relative_position(s1, s2)
            # skip s2 if it is contained within s1
            if rel is medaka.common.Relationship.s2_within_s1:
                logger.info('{} is contained within {}, skipping.'.format(
                    s2_name, s1_name))
                continue
            elif rel is medaka.common.Relationship.forward_overlap:
                end_1, start_2, _ = medaka.common.Sample.overlap_indices(
                    s1, s2)
            elif rel is medaka.common.Relationship.forward_gapped:
                is_last_in_contig = True
                end_1, start_2 = (None, None)
                msg = '{} and {} cannot be concatenated as there is ' + \
                      'no overlap and they do not abut.'
                logger.info(msg.format(s1_name, s2_name))
            else:
                raise RuntimeError(
                    'Unexpected sample relationship {} '
                    'between {} and {}'.format(repr(rel), s1.name, s2.name))

        yield s1.slice(slice(start_1, end_1)), is_last_in_contig

        s1 = s2
github nanoporetech / medaka / medaka / common.py View on Github external
gapped = True
            elif (s2_start_maj == s1_end_maj and
                    s2_start_min > s1_end_min + 1):  # missing minors
                gapped = True
            return gapped

        if s1.ref_name != s2.ref_name:  # different ref_names
            return Relationship.different_ref_name

        s1_ord, s2_ord = sorted((s1, s2), key=lambda x: (x.first_pos, -x.size))
        is_ordered = s1_ord.name == s1.name

        # is one sample within the other?
        if ordered_contained(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.s2_within_s1
            else:
                return Relationship.s1_within_s2

        # do samples abut?
        elif ordered_abuts(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_abutted
            else:
                return Relationship.reverse_abutted

        # do samples overlap?
        elif ordered_overlaps(s1_ord, s2_ord):
            if is_ordered:
                return Relationship.forward_overlap
            else:
                return Relationship.reverse_overlap