How to use the uszipcode.pkg.fuzzywuzzy.utils function in uszipcode

To help you get started, we’ve selected a few uszipcode examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
def _process_and_sort(s, force_ascii, full_process=True):
    """Return a cleaned string with token sorted."""
    # pull tokens
    ts = utils.full_process(s, force_ascii=force_ascii) if full_process else s
    tokens = ts.split()

    # sort tokens and join
    sorted_string = u" ".join(sorted(tokens))
    return sorted_string.strip()
github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
"""Find all alphanumeric tokens in each string...
        - treat them as a set
        - construct two strings of the form:
            
        - take ratios of those two strings
        - controls for unordered partial matches"""

    if not full_process and s1 == s2:
        return 100

    p1 = utils.full_process(
        s1, force_ascii=force_ascii) if full_process else s1
    p2 = utils.full_process(
        s2, force_ascii=force_ascii) if full_process else s2

    if not utils.validate_string(p1):
        return 0
    if not utils.validate_string(p2):
        return 0

    # pull tokens
    tokens1 = set(p1.split())
    tokens2 = set(p2.split())

    intersection = tokens1.intersection(tokens2)
    diff1to2 = tokens1.difference(tokens2)
    diff2to1 = tokens2.difference(tokens1)

    sorted_sect = " ".join(sorted(intersection))
    sorted_1to2 = " ".join(sorted(diff1to2))
    sorted_2to1 = " ".join(sorted(diff2to1))
github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
def ratio(s1, s2):
    s1, s2 = utils.make_type_consistent(s1, s2)

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())
github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
if len_ratio > 8:
        partial_scale = .6

    if try_partial:
        partial = partial_ratio(p1, p2) * partial_scale
        ptsor = partial_token_sort_ratio(p1, p2, full_process=False) \
            * unbase_scale * partial_scale
        ptser = partial_token_set_ratio(p1, p2, full_process=False) \
            * unbase_scale * partial_scale

        return utils.intr(max(base, partial, ptsor, ptser))
    else:
        tsor = token_sort_ratio(p1, p2, full_process=False) * unbase_scale
        tser = token_set_ratio(p1, p2, full_process=False) * unbase_scale

        return utils.intr(max(base, tsor, tser))
github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
def partial_ratio(s1, s2):
    """"Return the ratio of the most similar substring
    as a number between 0 and 100."""
    s1, s2 = utils.make_type_consistent(s1, s2)

    if len(s1) <= len(s2):
        shorter = s1
        longer = s2
    else:
        shorter = s2
        longer = s1

    m = SequenceMatcher(None, shorter, longer)
    blocks = m.get_matching_blocks()

    # each block represents a sequence of matching characters in a string
    # of the form (idx_1, idx_2, len)
    # the best partial match will block align with at least one of those blocks
    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
    #   block = (1,3,3)
github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
def ratio(s1, s2):
    s1, s2 = utils.make_type_consistent(s1, s2)

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())
github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
@utils.check_empty_string
def partial_ratio(s1, s2):
    """"Return the ratio of the most similar substring
    as a number between 0 and 100."""
    s1, s2 = utils.make_type_consistent(s1, s2)

    if len(s1) <= len(s2):
        shorter = s1
        longer = s2
    else:
        shorter = s2
        longer = s1

    m = SequenceMatcher(None, shorter, longer)
    blocks = m.get_matching_blocks()

    # each block represents a sequence of matching characters in a string
github MacHu-GWU / uszipcode-project / uszipcode / pkg / fuzzywuzzy / fuzz.py View on Github external
@utils.check_for_equivalence
@utils.check_empty_string
def ratio(s1, s2):
    s1, s2 = utils.make_type_consistent(s1, s2)

    m = SequenceMatcher(None, s1, s2)
    return utils.intr(100 * m.ratio())