How to use fuzzywuzzy - 10 common examples

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Xunius / MeiTingTrunk / lib / testpdfquery.py View on Github external
# compute input vars for fuzzy
        # height ratio wrt main text height
        hrii=hii/main_height
        # lowest y0
        y0ii=[ljj.y0 for ljj in gii]
        y0ii=np.min(y0ii)/page_h
        # number of words
        nwordsii=len(tii.split(' '))
        # similartiy measure between a predefined list of non-title words
        notitlefmii=[fuzz.token_set_ratio(tii,jj) for jj in NON_TITLE_LIST]
        notitlefmii=np.mean(notitlefmii)

        # similarity measure between title obtained from meta data
        if doctitle:
            metatitlefmii=fuzz.ratio(tii, doctitle)
            gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii,metatitlefmii))
        else:
            gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii))

    #pprint(gr_lines)

    #----------------Do fuzzy logic----------------
    fuzz_scores=FCTitleGuess(gr_lines, doctitle)

    title_idx=np.argmax(fuzz_scores)
    title_guess=gr_lines[title_idx]
    title_y0=title_guess[2]*page_h
    title_x0=groups[title_idx][0].x0

    #----------------Guess author list----------------
    top_lines=line_dict.keys()
github loonwerks / formal-methods-workbench / models / Trusted_Build_Test / test_tb_camkes.py View on Github external
def compare_output(baseline, current):
    similarity = 50;
    if (DEFAULT_ALGORITHM == 'ratio'):
       similarity = fuzz.ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_ratio'):
       similarity = fuzz.partial_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
       similarity = fuzz.token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
       similarity = fuzz.partial_token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
       similarity = fuzz.token_set_ratio(baseline, current)
    else:
       print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
       sys.exit(-1)
    return similarity
github loonwerks / formal-methods-workbench / models / Trusted_Build_Test / test_tb_camkes.py View on Github external
def compare_output(baseline, current):
    similarity = 50;
    if (DEFAULT_ALGORITHM == 'ratio'):
       similarity = fuzz.ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_ratio'):
       similarity = fuzz.partial_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
       similarity = fuzz.token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
       similarity = fuzz.partial_token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
       similarity = fuzz.token_set_ratio(baseline, current)
    else:
       print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
       sys.exit(-1)
    return similarity
github loonwerks / formal-methods-workbench / models / Trusted_Build_Test / test_tb_camkes.py View on Github external
def compare_output(baseline, current):
    similarity = 50;
    if (DEFAULT_ALGORITHM == 'ratio'):
       similarity = fuzz.ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_ratio'):
       similarity = fuzz.partial_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
       similarity = fuzz.token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
       similarity = fuzz.partial_token_sort_ratio(baseline, current)
    elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
       similarity = fuzz.token_set_ratio(baseline, current)
    else:
       print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
       sys.exit(-1)
    return similarity
github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def test_asciionly(self):
        for s in self.mixed_strings:
            # ascii only only runs on strings
            s = utils.asciidammit(s)
            utils.asciionly(s)
github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def testRatioUnicodeString(self):
        s1 = "\u00C1"
        s2 = "ABCD"
        score = fuzz.ratio(s1, s2)
        self.assertEqual(0, score)
github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def test_dict_like_extract(self):
        """We should be able to use a dict-like object for choices, not only a
        dict, and still get dict-like output.
        """
        try:
            from UserDict import UserDict
        except ImportError:
            from collections import UserDict
        choices = UserDict({'aa': 'bb', 'a1': None})
        search = 'aaa'
        result = process.extract(search, choices)
        self.assertTrue(len(result) > 0)
        for value, confidence, key in result:
            self.assertTrue(value in choices.values())
github sunlightlabs / datacommons / dcapi / reconcile / tests.py View on Github external
def test_service_metadata(self):
        self.maxDiff = None
        response = self.client.get('/api/1.0/refine/reconcile', {'callback': 'jsonp123'})

        self.assertEqual(200, response.status_code)
        self.assertEqual(100,
            fuzz.token_sort_ratio(
                'jsonp123({"name": "Influence Explorer Reconciliation3", "identifierSpace": "http://staging.influenceexplorer.com/ns/entities", "schemaspace": "http://staging.influenceexplorer.com/ns/entity.object.id", "view": { "url": "http://staging.influenceexplorer.com/entity/{{id}}" }, "preview": { "url": "http://staging.influenceexplorer.com/entity/{{id}}", "width": 430, "height": 300 }, "defaultTypes": []})',
                response.content
            )
github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def testTokenSetRatio(self):
        self.assertEqual(fuzz.token_set_ratio(self.s4, self.s5), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s8, self.s8a, full_process=False), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s9, self.s9a, full_process=True), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s9, self.s9a, full_process=False), 100)
        self.assertEqual(fuzz.token_set_ratio(self.s10, self.s10a, full_process=False), 50)
github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def test_fullProcess(self):
        for s in self.mixed_strings:
            utils.full_process(s)