How to use the fuzzywuzzy.fuzz.token_set_ratio function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github loonwerks / formal-methods-workbench / models / Trusted_Build_Test / test_tb_camkes.py View on Github external
def compare_output(baseline, current):
    similarity = 50;
    if (DEFAULT_ALGORITHM == 'ratio'):
       similarity = fuzz.ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_ratio'):
       similarity = fuzz.partial_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
       similarity = fuzz.token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
       similarity = fuzz.partial_token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
       similarity = fuzz.token_set_ratio(baseline, current)
    else:
       print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
       sys.exit(-1)
    return similarity
github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def testTokenSetRatio(self):
        self.assertEqual(fuzz.token_set_ratio(self.s4, self.s5), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s8, self.s8a, full_process=False), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s9, self.s9a, full_process=True), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s9, self.s9a, full_process=False), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s10, self.s10a, full_process=False), 50)
github rishooty / vrec-dat-filter / functions.py View on Github external
:return:
    """

    if dat_out is None:
        dat_out = os.path.splitext(dat_file)[0] + "clean.dat"

    # Read in dat file
    tree = ET.parse(dat_file)
    root = tree.getroot()

    # If game.get('name') does not fuzzy match source list, mark game for deletion.
    to_delete = []
    cloneof_to_keep = []
    for game in root.iter('game'):
        name = game.get('name')
        result = process.extractOne(name, roms_to_keep, score_cutoff=accuracy, scorer=fuzz.token_set_ratio)
        if not result:
            to_delete.append(game)
        else:
            parent_name = game.get('cloneof')
            if parent_name:
                cloneof_to_keep.append(parent_name)
    
    to_delete = [i for i in to_delete if i.get('name') not in cloneof_to_keep and i.get('cloneof') not in cloneof_to_keep]

    # Delete all game xml blocks that were marked for deletion and print final xml.
    del_xml_blocks(to_delete, tree, dat_out)
github Just-Some-Bots / ModTools / automod / bot.py View on Github external
highest_ratio = 0
        highest_ratio_name = ''
        if fuzz.ratio(string_one, string_two) > highest_ratio:
            highest_ratio = fuzz.ratio(string_one, string_two)
            highest_ratio_name = 'Normal Ratio'
        if fuzz.partial_ratio(string_one, string_two) > highest_ratio:
            highest_ratio = fuzz.partial_ratio(string_one, string_two)
            highest_ratio_name = 'Partial Ratio'
        elif fuzz.partial_ratio(string_one, string_two) == highest_ratio:
            highest_ratio_name += ', Partial Ratio'
        if fuzz.token_sort_ratio(string_one, string_two) > highest_ratio:
            highest_ratio = fuzz.token_sort_ratio(string_one, string_two)
            highest_ratio_name = 'Token Sort Ratio'
        elif fuzz.partial_ratio(string_one, string_two) == highest_ratio:
            highest_ratio_name += ', Token Sort Ratio'
        if fuzz.token_set_ratio(string_one, string_two) > highest_ratio:
            highest_ratio = fuzz.token_set_ratio(string_one, string_two)
            highest_ratio_name = 'Token Set Ratio'
        elif fuzz.partial_ratio(string_one, string_two) == highest_ratio:
            highest_ratio_name += ', Token Set Ratio'
        return [highest_ratio, highest_ratio_name]
github perrette / papers / papers / extract.py View on Github external
def _scholar_score(txt, bib):
    # high score means high similarity
    from fuzzywuzzy.fuzz import token_set_ratio
    return sum([token_set_ratio(bib[k], txt) for k in ['title', 'author', 'abstract'] if k in bib])
github mynameisfiber / mrboterson / mrboterson / plugins / trivia.py View on Github external
async def _on_answer(self, conv):
        channel = conv.channel
        answer = conv.meta['answer'].lower()
        win_event = None
        for event in conv.events:
            if 'trivia' in event:
                continue
            score = fuzz.token_set_ratio(event['text_clean'], answer)
            if score > 80:
                win_event = event
                break
            elif score > 50:
                try:
                    await self.bot.send_message(channel,
                        "<@{}> Not quite...".format(event['user']))
                except KeyError as e:
                    import traceback
                    print("\n\nSomething went wrong in trivia")
                    traceback.print_exc()
            event['trivia'] = True
            conv.meta['attempts'] += 1
        if win_event is not None:
            user = win_event['user']
            self.status[channel]['scores'][user] += conv.meta['value']
github sunlightlabs / read_FEC / fecreader / reconciliation / fec_reconciler.py View on Github external
blocking_name = simple_clean(name)
        
    possible_matches = block_by_startswith(blocking_name, starts_with_blocklength, state, office, cycle)
        
    for match in possible_matches:
        
        name2_name = HumanName(match['cand_name'])
        name2 = simple_clean(name2_name.last) + " " + unnickname(name2_name.first)
        # calculate a buncha metrics
        text1 = name1_standardized
        text2 = name2
        #print "comparing '%s' to '%s'" % (text1, text2)
        ratio = 1/100.0*fuzz.ratio(text1, text2)
        partial_ratio = 1/100.0*fuzz.partial_ratio(text1, text2)
        token_sort_ratio = 1/100.0*fuzz.token_sort_ratio(text1, text2)
        token_set_ratio = 1/100.0*fuzz.token_set_ratio(text1, text2)
        
        avg_len = 1/2*len(text1)+len(text2)
        min_len = min(len(text1), len(text2))
        
        l_ratio = 0
        try:
            l_distance = jellyfish.levenshtein_distance(text1, text2)
            l_ratio = 1.0 - ( (0.0 + l_distance) / (0.0+avg_len) )
        except UnicodeEncodeError:
            pass
            
        long_match = longest_match(text1, text2)
        lng_ratio = (0.0 + long_match) / (0.0 + min_len)
        
        score = 0
        if ( ratio > 0.6 or partial_ratio > 0.6 or l_ratio > 0.6 or lng_ratio > 0.6):
github ricosr / retrieval_chatbot / fuzzy_match.py View on Github external
def fuzzy_matching(utterance, context_ls):
    ratio_ls = []
    for i in range(len(context_ls)):
        if context_ls[i] == (0, 0):
            continue
        ratio_sum = 0
        ratio_sum += fuzz.ratio(utterance+','+context_ls[i][1], context_ls[i][0]+','+context_ls[i][1])
        ratio_sum += fuzz.partial_ratio(utterance+','+context_ls[i][1], context_ls[i][0]+','+context_ls[i][1])
        ratio_sum += fuzz.token_sort_ratio(utterance+','+context_ls[i][1], context_ls[i][0]+','+context_ls[i][1])
        ratio_sum += fuzz.token_set_ratio(utterance+','+context_ls[i][1], context_ls[i][0]+','+context_ls[i][1])
        mean_ratio = ratio_sum / 4
        ratio_ls.append(mean_ratio)
    return normalization(ratio_ls)
github SEED-platform / seed / seed / reconcile.py View on Github external
return 0.0

    total_match = 0.0
    # This becomes our denominator for arithemetic mean.
    num_attrs = 0.0
    for a_attr, b_attr in attr_map:
        _trans, a_attr = _unpack_a_attr(a_attr)
        a_value = getattr(model_a, a_attr)
        b_value = getattr(model_b, b_attr)
        if not a_value or not b_value:
            continue

        num_attrs += 1.0

        # Because we want a ratio, not a precentage
        ratio = fuzz.token_set_ratio(
            unicode(a_value), unicode(b_value)
        ) / 100.0
        total_match += ratio

    return total_match / max(num_attrs, 1)
github vabraham / foodie_favorites / modeling / restaurant_pop_list.py View on Github external
def menu_count(food_sents, rest_menu):
    """Return a count of menu items and how often they were tagged."""
    blank_list = []
    for sent in (food_sents):
        if len(sent) <= 150:
            for x in rest_menu:
                if fuzz.partial_ratio(sent, x) >= 75:
                    blank_list.append(x)
        else:
            for x in rest_menu:
                if fuzz.token_set_ratio(sent, x) >= 68:
                    blank_list.append(x)
    return Counter(blank_list)