How to use the jellyfish.compat._range function in jellyfish

To help you get started, we’ve selected a few jellyfish examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jamesturk / jellyfish / jellyfish / _jellyfish.py View on Github external
return 0

    min_len = max(ying_len, yang_len)
    search_range = (min_len // 2) - 1
    if search_range < 0:
        search_range = 0

    ying_flags = [False]*ying_len
    yang_flags = [False]*yang_len

    # looking only within search range, count & flag matched pairs
    common_chars = 0
    for i, ying_ch in enumerate(ying):
        low = i - search_range if i > search_range else 0
        hi = i + search_range if i + search_range < yang_len else yang_len - 1
        for j in _range(low, hi+1):
            if not yang_flags[j] and yang[j] == ying_ch:
                ying_flags[i] = yang_flags[j] = True
                common_chars += 1
                break

    # short circuit if no characters match
    if not common_chars:
        return 0

    # count transpositions
    k = trans_count = 0
    for i, ying_f in enumerate(ying_flags):
        if ying_f:
            for j in _range(k, yang_len):
                if yang_flags[j]:
                    k = j + 1
github jamesturk / jellyfish / jellyfish / _jellyfish.py View on Github external
hi = i + search_range if i + search_range < yang_len else yang_len - 1
        for j in _range(low, hi+1):
            if not yang_flags[j] and yang[j] == ying_ch:
                ying_flags[i] = yang_flags[j] = True
                common_chars += 1
                break

    # short circuit if no characters match
    if not common_chars:
        return 0

    # count transpositions
    k = trans_count = 0
    for i, ying_f in enumerate(ying_flags):
        if ying_f:
            for j in _range(k, yang_len):
                if yang_flags[j]:
                    k = j + 1
                    break
            if ying[i] != yang[j]:
                trans_count += 1
    trans_count /= 2

    # adjust for similarities in nonmatched characters
    common_chars = float(common_chars)
    weight = ((common_chars/ying_len + common_chars/yang_len +
              (common_chars-trans_count) / common_chars)) / 3

    # winkler modification: continue to boost if strings are similar
    if winklerize and weight > 0.7 and ying_len > 3 and yang_len > 3:
        # adjust for up to first 4 chars in common
        j = min(min_len, 4)
github jamesturk / jellyfish / jellyfish / porter.py View on Github external
def vowel_in_stem(self):
        """ True iff 0...j contains vowel """
        for i in _range(0, self.j + 1):
            if not self.cons(i):
                return True
        return False
github jamesturk / jellyfish / jellyfish / _jellyfish.py View on Github external
def damerau_levenshtein_distance(s1, s2):
    if isinstance(s1, bytes) or isinstance(s2, bytes):
        raise TypeError(_no_bytes_err)

    len1 = len(s1)
    len2 = len(s2)
    infinite = len1 + len2

    # character array
    da = defaultdict(int)

    # distance matrix
    score = [[0]*(len2+2) for x in _range(len1+2)]

    score[0][0] = infinite
    for i in _range(0, len1+1):
        score[i+1][0] = infinite
        score[i+1][1] = i
    for i in _range(0, len2+1):
        score[0][i+1] = infinite
        score[1][i+1] = i

    for i in _range(1, len1+1):
        db = 0
        for j in _range(1, len2+1):
            i1 = da[s2[j-1]]
            j1 = db
            cost = 1
            if s1[i-1] == s2[j-1]:
github jamesturk / jellyfish / jellyfish / _jellyfish.py View on Github external
if isinstance(s1, bytes) or isinstance(s2, bytes):
        raise TypeError(_no_bytes_err)

    if s1 == s2:
        return 0
    rows = len(s1)+1
    cols = len(s2)+1

    if not s1:
        return cols-1
    if not s2:
        return rows-1

    prev = None
    cur = range(cols)
    for r in _range(1, rows):
        prev, cur = cur, [r] + [0]*(cols-1)
        for c in _range(1, cols):
            deletion = prev[c] + 1
            insertion = cur[c-1] + 1
            edit = prev[c-1] + (0 if s1[r-1] == s2[c-1] else 1)
            cur[c] = min(edit, deletion, insertion)

    return cur[-1]