How to use the fuzzywuzzy.fuzz.UWRatio function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github seatgeek / fuzzywuzzy / test_fuzzywuzzy_hypothesis.py View on Github external
def scorers_processors():
    """
    Generate a list of (scorer, processor) pairs for testing

    :return: [(scorer, processor), ...]
    """
    scorers = [fuzz.ratio,
               fuzz.partial_ratio]
    processors = [lambda x: x,
                  partial(utils.full_process, force_ascii=False),
                  partial(utils.full_process, force_ascii=True)]
    splist = list(product(scorers, processors))
    splist.extend(
        [(fuzz.WRatio, partial(utils.full_process, force_ascii=True)),
         (fuzz.QRatio, partial(utils.full_process, force_ascii=True)),
         (fuzz.UWRatio, partial(utils.full_process, force_ascii=False)),
         (fuzz.UQRatio, partial(utils.full_process, force_ascii=False)),
         (fuzz.token_set_ratio, partial(utils.full_process, force_ascii=True)),
         (fuzz.token_sort_ratio, partial(utils.full_process, force_ascii=True)),
         (fuzz.partial_token_set_ratio, partial(utils.full_process, force_ascii=True)),
         (fuzz.partial_token_sort_ratio, partial(utils.full_process, force_ascii=True))]
    )

    return splist
github Sefaria / Sefaria-Project / scripts / lexicon / adjust_rashi_laaz.py View on Github external
def find_closest_match(text_rows, word, default_compare=True, filter_words_with_quotation_marks=True):
    #Todo: if the headword is more than one owrd, must look through ngrams instead of just split single words.
    results = []
    headword_size = len(word.split())
    scorer = fuzz.UWRatio if default_compare else fuzz.UQRatio
    for row in text_rows:
        text_words = row[1].split()
        if headword_size > 1:
            text_words = create_ngrams(text_words, headword_size)
        if filter_words_with_quotation_marks:
            text_words = [w for w in text_words if '"' in w]
        if len(text_words):
            matched_word, score = process.extractOne(word.replace('"', ''), text_words, processor=laaz_process, scorer=scorer)
            results.append((row[0], row[1], matched_word, score))
    sresults = sorted(results, key=lambda x: x[-1], reverse=True)
    top_res = sresults[0] if len(sresults) else None
    # if strings are not similar enough in length, use a different comparison
    if top_res and len(word) and default_compare and float(max(len(top_res[2]), len(word))) / min(len(top_res[2]), len(word)) >= 2:
        top_res = find_closest_match(text_rows, word, default_compare=False)
    return top_res
github ubisoftinc / vulnmine / vulnmine / matchven.py View on Github external
'ven_cln': t_cpeVen,
                        'fz_ratio': fz.ratio(
                                t_cpeVen,
                                t_arPub0),
                        'fz_ptl_ratio': fz.partial_ratio(
                                t_cpeVen,
                                t_arPub0),
                        'fz_tok_set_ratio': fz.token_set_ratio(
                                t_cpeVen,
                                t_arPub0,
                                force_ascii=False),
                        'fz_ptl_tok_sort_ratio': fz.partial_token_sort_ratio(
                                    t_cpeVen,
                                    t_arPub0,
                                    force_ascii=False),
                        'fz_uwratio': fz.UWRatio(
                                t_cpeVen,
                                t_arPub0)
                        })
                    mycount = mycount + 1
                    if mycount % 1000 == 0:
                        self.logger.debug(
                                    '# entries produced: {0}\n'.format(
                                                mycount
                                                )
                                     )

                #     # debug code to shorten loop for testing
                #     if mycount > 1000:
                #         break

                # # debug code to speed thru loops
github Pinafore / qb / ingestion / title_finder.py View on Github external
def scorer(left, right):
    if right.startswith("list of") or \
       right.endswith(" topics") or \
       right.startswith("wikiproject"):
        val = 0
    else:
        val = UWRatio(left, right)
    return val
github ubisoftinc / vulnmine / vulnmine / matchsft.py View on Github external
'fz_ratio': fz.ratio(
                                        t_cpe_titleX_tmp,
                                        t_ar_dsply0_tmp
                                        ),
                        'fz_ptl_ratio': fz.partial_ratio(
                                                t_cpe_titleX_tmp,
                                                t_ar_dsply0_tmp
                                                ),
                        'fz_tok_set_ratio': fz_ptl_tok_set_ratio,
                        'fz_ptl_tok_sort_ratio': fz.token_sort_ratio(
                                                    t_cpe_titleX_tmp,
                                                    t_ar_dsply0_tmp,
                                                    force_ascii=False
                                                    ),
                        'fz_uwratio': fz.UWRatio(
                                        t_cpe_titleX_tmp,
                                        t_ar_dsply0_tmp
                                        ),

                        'fz_rel_ratio': fz_rel_ratio,
                        'fz_rel_ptl_ratio': fz_rel_ptl_ratio,
                        't_cve_name': t_cve_name
                        })
                    m = m+1

                n = n+1
                if n % 100 < 1:
                    self.logger.debug(
                            '---Working ar: '
                            'sccm sft i/p: {0} '
                            ', potential matches output: {1}\n'.format(n, m)