How to use the medaka.common function in medaka

To help you get started, we’ve selected a few medaka examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nanoporetech / medaka / medaka / rle.py View on Github external
def rlebam(args):
    """Entry point for merging run length information for fast5s to bam."""
    logger = medaka.common.get_named_logger('BAMDecor')
    read_index = medaka.common.read_key_value_tsv(args.read_index)
    logger.info("Found {} read in index\n".format(len(read_index)))

    def _ingress():
        for line in sys.stdin:
            if line[0] == '@':
                yield line.rstrip(), None, None, None
            else:
                read_id, flag, _ = line.split('\t', 2)
                is_rev = bool(int(flag) & 16)
                fname = read_index[read_id]
                yield line.rstrip(), read_id, is_rev, fname

    with concurrent.futures.ProcessPoolExecutor(
            max_workers=args.workers) as executor:
        for results in executor.map(
github nanoporetech / medaka / medaka / models.py View on Github external
"""Creation and loading of models."""

import os
import pathlib
import tempfile

import requests

import medaka.common
import medaka.datastore
import medaka.options

logger = medaka.common.get_named_logger('ModelLoad')


class DownloadError(ValueError):
    """Raised when model is unsuccessfully downloaded."""


def resolve_model(model):
    """Resolve a model filepath, downloading known models if necessary.

    :param model_name: str, model filepath or model ID

    :returns: str, filepath to model file.
    """
    if os.path.exists(model):  # model is path to model file
        return model
    elif model not in medaka.options.allowed_models:
github nanoporetech / medaka / medaka / prediction.py View on Github external
def run_prediction(
        output, bam, regions, model, feature_encoder,
        chunk_len, chunk_ovlp, batch_size=200,
        save_features=False, enable_chunking=True):
    """Inference worker."""
    logger = medaka.common.get_named_logger('PWorker')

    remainder_regions = list()
    loader = DataLoader(
        4 * batch_size, bam, regions, feature_encoder,
        chunk_len=chunk_len, chunk_overlap=chunk_ovlp,
        enable_chunking=enable_chunking)
    batches = medaka.common.grouper(loader, batch_size)

    total_region_mbases = sum(r.size for r in regions) / 1e6
    logger.info(
        "Running inference for {:.1f}M draft bases.".format(
            total_region_mbases))

    with medaka.datastore.DataStore(output, 'a') as ds:
        mbases_done = 0
        cache_size_log_interval = 5
github nanoporetech / medaka / medaka / labels.py View on Github external
def _group_and_trim_by_haplotype(alignments):
        """Group alignments by haplotype tag and trim to common genomic window.

        :param alignments: {haplotype: [`TruthAlignment`]}
        :returns: list of tuples where each tuple contains `TruthAlignment`
            for each haplotype trimmed to common genomic window.

        .. note:: We should avoid the situation of staggered alignments
             which could occur by independently chunking each haplotype
             by chunking the draft and aligning to both haplotypes, then
             chunking both haplotypes according to draft-chunks, then realining
             haplotype chunks to back to the draft - this should minimize
             staggering of truth alignments and hence the number of labels
             discarded.
        """
        logger = medaka.common.get_named_logger("Group_and_trim")
        haplotypes = sorted(list(alignments.keys()))
        if len(haplotypes) == 1:  # haploid
            grouped = [(a,) for a in alignments[haplotypes[0]]]
        else:
            # create interval trees for other haplotypes
            trees = {}
            for h in haplotypes[1:]:
                trees[h] = intervaltree.IntervalTree(
                    [intervaltree.Interval(a.start, a.end, a)
                        for a in alignments[h]])
            # loop over alignments in first haplotype and find overlapping
            # alignments in other haplotypes. If there are multiple overlapping
            # alignments, take the one with the longest overlap.
            grouped = []
            for a in alignments[haplotypes[0]]:
                group = [a]