How to use the edlib.align function in edlib

To help you get started, we’ve selected a few edlib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github rrwick / Bacsort / rmlst_distance_matrix.py View on Github external
if assembly_1 == assembly_2:
        distances[(assembly_1, assembly_2)] = 0.0
        return

    pattern = re.compile(r'\d+[\w=]')
    common_genes = set(gene_seqs[assembly_1]) & set(gene_seqs[assembly_2])
    if not common_genes:
        distances[(assembly_1, assembly_2)] = 1.0
        distances[(assembly_2, assembly_1)] = 1.0
        return

    alignments = []
    for gene in common_genes:
        gene_1 = gene_seqs[assembly_1][gene]
        gene_2 = gene_seqs[assembly_2][gene]
        result = edlib.align(gene_1, gene_2, 'NW', 'path')
        cigar = [(int(x[:-1]), x[-1]) for x in pattern.findall(result['cigar'])]
        alignment_length = sum(x[0] for x in cigar)
        match_count = sum(x[0] for x in cigar if x[1] == '=')
        identity = match_count / alignment_length
        alignments.append((identity, match_count, alignment_length))

    # alignments = sorted(alignments, key=lambda x: x[0])
    # discard_count = 0
    # while True:
    #     if (discard_count + 2) / len(alignments) < discard_best_worst_alignments:
    #         discard_count += 2
    #     else:
    #         break
    # discard_each_end = discard_count // 2
    # alignments = alignments[discard_count:-discard_count]
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
def parseMappingFileIllumina(input):
    fwdprimer = ''
    revprimer = ''
    samples = []
    with open(input, 'r') as inputfile:
        for line in inputfile:
            line = line.replace('\n', '')
            if line.startswith('#'):
                continue
            cols = line.split('\t')
            if not cols[0] in samples:
                samples.append(cols[0])
            match = edlib.align(cols[1], cols[2], mode="HW", k=0)
            if match["editDistance"] == 0:
                Trim = match["locations"][0][1]+1
                if fwdprimer == '':
                    fwdprimer = cols[2][Trim:]
                    revprimer = cols[3]
            else:
                fwdprimer = cols[2]
                revprimer = cols[3]
        return (samples, fwdprimer, revprimer)
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
def trimForPrimer(primer, seq, primer_mismatch):
    foralign = edlib.align(primer, seq, mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
    if foralign['editDistance'] < 0:
        return 0
    else:
        CutPos = foralign["locations"][0][1]+1
        return CutPos
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
NoPrimer += 1
                        continue
                    stringent = {}
                    stringent[BCLabel] = samples[BCLabel]['RevBarcode']
                    revBC, revBCLabel = AlignBarcode(read2[1], stringent, barcode_mismatch)
                    if revBC == '':
                        NoRevBarcode += 1
                        continue
                    #look for reverse primer in reverse read
                    revalign = edlib.align(RevPrimer, read2[1], mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                    if revalign['editDistance'] < 0: #not found
                        NoRevPrimer += 1
                        continue
                else:
                    #look for forward primer
                    foralign = edlib.align(fwdprimer, read1[1], mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                    if foralign['editDistance'] < 0: #not found
                        NoPrimer += 1
                        continue
                    if len(revbarcodes) > 0:
                        #look for valid revbarcodes
                        if len(revbarcodes) > 0:
                            revBC, revBCLabel = AlignBarcode(read2[1], revbarcodes, barcode_mismatch)
                            if revBC == '':
                                NoRevBarcode += 1
                                continue
                    #look for reverse primer in reverse read
                    revalign = edlib.align(revprimer, read2[1], mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                    if revalign['editDistance'] < 0: #not found
                        NoRevPrimer += 1
                        continue
                #if get here, then all is well, construct new header and trim reads
github iprada / Circle-Map / circlemap / utils.py View on Github external
nuc_and_ops =  np.array([1,2,3,4,5,6,7])
    encoded_nucs = number_encoding(soft_clipped_read['seq'])

    hits = 0

    min_score = len(soft_clipped_read['seq'])


    top_hits = {}


    if read.is_reverse:

        while hits < n_hits and min_score >= -10:

            alignment = edlib.align(soft_clipped_read['seq'], minus_strand, mode='HW', task='path')
            if hits ==0:
                if alignment['editDistance'] > max_edit:
                    return(None)



            for location in alignment['locations']:



                mask_bases = 'X' * ( location[1] - location[0])


                minus_strand = minus_strand[:location[0]] + mask_bases + minus_strand[location[1]:]

                hits += 1
github Genomon-Project / GenomonSV / genomon_sv / edlibFunction.py View on Github external
# [2]=ref1,[3]=ref1_rev,
                # [4]=ref2,[5]=ref2_rev,
                # [6]=ref, [7]=ref_rev
                edlib_ret = [100,100,100,100,100,100,100,100]

                # alt
                ret = edlib.align(line, fa_alt, mode="HW", task="path")
                edlib_ret[0] = ret["editDistance"]
                ret = edlib.align(rc_line, fa_alt, mode="HW", task="path")
                edlib_ret[1] = ret["editDistance"]

                if fa_ref == "":
                    # ref1
                    ret = edlib.align(line, fa_ref1, mode="HW", task="path")
                    edlib_ret[2] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref1, mode="HW", task="path")
                    edlib_ret[3] = ret["editDistance"]
                    # ref2
                    ret = edlib.align(line, fa_ref2, mode="HW", task="path")
                    edlib_ret[4] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref2, mode="HW", task="path")
                    edlib_ret[5] = ret["editDistance"]
                else:
                    # ref
                    ret = edlib.align(line, fa_ref, mode="HW", task="path")
                    edlib_ret[6] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref, mode="HW", task="path")
                    edlib_ret[7] = ret["editDistance"]

                if read_pair == 1:
                    d_edlib_read1[name] = edlib_ret
                else:
github Genomon-Project / GenomonSV / genomon_sv / edlibFunction.py View on Github external
edlib_ret[1] = ret["editDistance"]

                if fa_ref == "":
                    # ref1
                    ret = edlib.align(line, fa_ref1, mode="HW", task="path")
                    edlib_ret[2] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref1, mode="HW", task="path")
                    edlib_ret[3] = ret["editDistance"]
                    # ref2
                    ret = edlib.align(line, fa_ref2, mode="HW", task="path")
                    edlib_ret[4] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref2, mode="HW", task="path")
                    edlib_ret[5] = ret["editDistance"]
                else:
                    # ref
                    ret = edlib.align(line, fa_ref, mode="HW", task="path")
                    edlib_ret[6] = ret["editDistance"]
                    ret = edlib.align(rc_line, fa_ref, mode="HW", task="path")
                    edlib_ret[7] = ret["editDistance"]

                if read_pair == 1:
                    d_edlib_read1[name] = edlib_ret
                else:
                    d_edlib_read2[name] = edlib_ret

    numRef = 0 
    numAlt = 0
    numOther = 0
    for name in d_edlib_read1.keys():
        ret1 = d_edlib_read1[name]
        ret2 = d_edlib_read2[name]
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
multihits = 0
    findForPrimer = 0
    findRevPrimer = 0
    with open(outR1, 'w') as outfile1:
        with open(outR2, 'w') as outfile2:
            for read1, read2 in zip(file1, file2):
                Total += 1
                ffp = False
                frp = False
                R1Seq = read1[1][:RL]
                R1Qual = read1[2][:RL]
                R2Seq = read2[1][:RL]
                R2Qual = read2[2][:RL]
                ForTrim, RevTrim = (0,)*2
                #look for forward primer in forward read
                R1foralign = edlib.align(fwdprimer, R1Seq, mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                if R1foralign['editDistance'] < 0:
                    if require_primer == 'on' or full_length: #not found
                        continue
                else:
                    if len(R1foralign['locations']) > 1: #multiple hits
                        multihits += 1
                        continue
                    try:
                        ForTrim = R1foralign["locations"][0][1]+1
                        findForPrimer += 1
                        ffp = True
                    except IndexError:
                        pass
                R1revalign = edlib.align(RevComp(revprimer), R1Seq, mode="HW", k=primer_mismatch, additionalEqualities=degenNuc)
                if R1revalign['editDistance'] < 0:
                    R1RevCut = RL
github nextgenusfs / amptk / amptk / process_ont.py View on Github external
def findFwdPrimer(primer, sequence, mismatch, equalities):
    # search for match
    align = edlib.align(primer, sequence, mode="HW",
                        k=int(mismatch), additionalEqualities=equalities)
    if align["editDistance"] >= 0:  # we found a hit
        TrimPos = align["locations"][0][1]+1
        return(TrimPos, align['editDistance'])
    else:  # return position will be False if not found
        return (False, False)
github nextgenusfs / amptk / amptk / amptklib.py View on Github external
def findRevPrimer(primer, sequence, mismatch, equalities):
    #trim position
    TrimPos = None
    #search for match
    align = edlib.align(primer, sequence, mode="HW", task="locations", k=int(mismatch), additionalEqualities=equalities)
    if align["editDistance"] >= 0: #we found a hit
        TrimPos = align["locations"][0][0]
    #return position will be None if not found
    return TrimPos

edlib

Lightweight, super fast library for sequence alignment using edit (Levenshtein) distance.

MIT
Latest version published 3 years ago

Package Health Score

55 / 100
Full package analysis

Popular edlib functions

Similar packages