How to use the fuzzywuzzy.fuzz.partial_token_set_ratio function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def testPartialTokenSetRatio(self):
        self.assertEqual(fuzz.partial_token_set_ratio(self.s4, self.s7), 100)
github seatgeek / fuzzywuzzy / fuzzywuzzy / process.py View on Github external
# perfom a noop as it still needs to be a function
    if processor is None:
        processor = no_process

    # Run the processor on the input query.
    processed_query = processor(query)

    if len(processed_query) == 0:
        logging.warning(u"Applied processor reduces input query to empty string, "
                        "all comparisons will have score 0. "
                        "[Query: \'{0}\']".format(query))

    # Don't run full_process twice
    if scorer in [fuzz.WRatio, fuzz.QRatio,
                  fuzz.token_set_ratio, fuzz.token_sort_ratio,
                  fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
                  fuzz.UWRatio, fuzz.UQRatio] \
            and processor == utils.full_process:
        processor = no_process

    # Only process the query once instead of for every choice
    if scorer in [fuzz.UWRatio, fuzz.UQRatio]:
        pre_processor = partial(utils.full_process, force_ascii=False)
        scorer = partial(scorer, full_process=False)
    elif scorer in [fuzz.WRatio, fuzz.QRatio,
                    fuzz.token_set_ratio, fuzz.token_sort_ratio,
                    fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio]:
        pre_processor = partial(utils.full_process, force_ascii=True)
        scorer = partial(scorer, full_process=False)
    else:
        pre_processor = no_process
    processed_query = pre_processor(processed_query)
github ubisoftinc / vulnmine / vulnmine / matchsft.py View on Github external
t_cpe_relX_tmp,
                                                t_ar_ver0
                                                )

                    if (t_cpe_relX_tmp != '-') and (t_ar_ver0 != '-'):

                        # If release data is specified, then check that
                        # there is at least a partial match

                        if fz_rel_ratio < 90 or fz_rel_ptl_ratio < 100:
                            continue

                    # 2) There should be at least one occurence of one word in
                    # the cpe full name somewhere in sccm full name

                    fz_ptl_tok_set_ratio = fz.partial_token_set_ratio(
                                                t_cpe_titleX_tmp,
                                                t_ar_dsply0_tmp,
                                                force_ascii=False
                                                )

                    if fz_ptl_tok_set_ratio < 70:
                        continue

                    ######
                    # calculate fuzzy matching statistics for this match
                    ######

                    lst_dict.append({
                        'vendor_X': t_cpe_vdr_X,
                        'software_X': t_cpe_sft_X,
                        'Version0': t_ar_ver0,
github abhishekkrthakur / is_that_a_duplicate_quora_question / feature_engineering.py View on Github external
data['fuzz_partial_token_set_ratio'] = data.apply(lambda x: fuzz.partial_token_set_ratio(str(x['question1']), str(x['question2'])), axis=1)
data['fuzz_partial_token_sort_ratio'] = data.apply(lambda x: fuzz.partial_token_sort_ratio(str(x['question1']), str(x['question2'])), axis=1)
github ubisoftinc / vulnmine / vulnmine / matchven.py View on Github external
t_arPub0
                        ) in df_arPub.itertuples():

                    # quick heuristics:
                    #   a) 1st word of cpe Vendor string has to be in the
                    #            tokenized wmi Publisher0 string somewhere
                    #   b) condensed cpe name has to be shorter than the full
                    #           WMI 'Publisher0' name

                    if len(t_cpeVen) > len(t_arPub0):
                        # self.logger.debug('arPub0 too short - continuing'
                        continue

                    # Look for at least one occurence of one word in cpeVen
                    #       somewhere in arPub
                    if fz.partial_token_set_ratio(
                                            t_cpeVen,
                                            t_arPub0,
                                            force_ascii=False
                                            ) < 100:
                        continue

                    # Calculate fuzzy matching statistics as "features" for
                    # the subsequent ML classification

                    lst_dict.append({
                        'publisher0': t_arPub0_orig,
                        'pub0_cln': t_arPub0,
                        'vendor_X': t_cpeVen_orig,
                        'ven_cln': t_cpeVen,
                        'fz_ratio': fz.ratio(
                                t_cpeVen,
github AlexYangLi / NLI_Keras / utils / features.py View on Github external
def fuzzy(s1, s2):
    return [fuzz.ratio(s1, s2) / 100,
            fuzz.partial_ratio(s1, s2) / 100,
            fuzz.token_sort_ratio(s1, s2) / 100,
            fuzz.partial_token_sort_ratio(s1, s2) / 100,
            fuzz.token_set_ratio(s1, s2) / 100,
            fuzz.partial_token_set_ratio(s1, s2) / 100,
            fuzz.QRatio(s1, s2) / 100,
            fuzz.WRatio(s1, s2) / 100]