How to use the fuzzywuzzy.process.extractBests function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github seatgeek / fuzzywuzzy / test_fuzzywuzzy_hypothesis.py View on Github external
"""
    # Draw a list of random strings
    strings = data.draw(
        st.lists(st.text(min_size=10, max_size=100),
                 min_size=1, max_size=50))
    # Draw a random integer for the index in that list
    choiceidx = data.draw(st.integers(min_value=0, max_value=(len(strings) - 1)))

    # Extract our choice from the list
    choice = strings[choiceidx]

    # Check process doesn't make our choice the empty string
    assume(processor(choice) != '')

    # Extract all perfect matches
    result = process.extractBests(choice,
                                  strings,
                                  scorer=scorer,
                                  processor=processor,
                                  score_cutoff=100,
                                  limit=None)

    # Check we get a result
    assert result != []

    # Check THE ONLY result(s) we get are a perfect match for the (processed) original data
    pchoice = processor(choice)
    for r in result:
        assert pchoice == processor(r[0])
github seatgeek / fuzzywuzzy / test_fuzzywuzzy_hypothesis.py View on Github external
"""
    # Draw a list of random strings
    strings = data.draw(
        st.lists(st.text(min_size=10, max_size=100),
                 min_size=1, max_size=50))
    # Draw a random integer for the index in that list
    choiceidx = data.draw(st.integers(min_value=0, max_value=(len(strings) - 1)))

    # Extract our choice from the list
    choice = strings[choiceidx]

    # Check process doesn't make our choice the empty string
    assume(processor(choice) != '')

    # Extract all perfect matches
    result = process.extractBests(choice,
                                  strings,
                                  scorer=scorer,
                                  processor=processor,
                                  score_cutoff=100,
                                  limit=None)

    # Check we get a result
    assert result != []

    # Check the original is in the list
    assert (choice, 100) in result
github techwizrd / tarpon / src / tarpon_app / gtk / components.py View on Github external
def search_docsets(self, widget):
        # TODO: We should move this off of the main thread for performance
        query = widget.get_text().strip()
        if query:
            self.__results = search(query, self.__application.choices,
                                    processor=lambda x: x.name)
        else:
            self.__results = None
        self.__sidebar_filter.refilter()
github sdss / marvin / python / marvin / tools / query_utils.py View on Github external
def get_best_fuzzy(name, choices, cutoff=0, return_score=False):
    items = process.extractBests(name, choices, score_cutoff=cutoff)

    if not items:
        return None
    elif len(items) == 1:
        best = items[0]
    else:
        scores = [s[1] for s in items]
        # finds items with the same score
        morethanone = sum(np.max(scores) == scores) > 1
        if morethanone:
            # tries to find an exact string match
            exact = []
            for s in items:
                itemname = s[0].name if isinstance(s[0], QueryParameter) else s[0]
                if itemname.lower() == name.lower():
                    exact.append(s)
github alpha-beta-soup / errorgeopy / errorgeopy / address.py View on Github external
Kwargs:
            limit (int): The maximum number of match candidates to retrieve
            from fuzzywuzzy. The length of the returned array may be longer, if
            the set of addresses has identical addresses that are good matches
            for the expected address (i.e. if two geocoders resolve to the same
            string address).

        Returns:
            list. Return value is a list of tuples, where each tuple contains a
            geopy Location, and a matching score based on an extension of the
            Levenshtien distance between the expectation and the Location's
            address (a higher score is a better match). The algorithm is
            implemented by SeatGeek's fuzzywuzzy, and you can read more here:
            http://chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/
        """
        extractions = fuzzyprocess.extractBests(
            expectation, [str(a) for a in self.addresses],
            limit=limit)
        result = []
        for extraction in extractions:
            result.extend([(x, extraction[1]) for x in self.addresses
                           if str(x) == extraction[0]])
        return result
github sdss / marvin / python / marvin / utils / general / structs.py View on Github external
raise ValueError('invalid value. Must be a string.')

    if len(value) < 3:
        raise ValueError('your fuzzy search value must be at least three characters long.')

    if len(choices) == 0:
        raise ValueError('choices cannot be an empty list.')

    # If the value contains _ivar or _mask this is probably and incorrect use
    # of the fuzzy feature. We raise an error.
    if '_ivar' in value:
        raise ValueError('_ivar not allowd in search value.')
    elif '_mask' in value:
        raise ValueError('_mask not allowd in search value.')

    bests = fuzz_proc.extractBests(value, choices, scorer=scorer, score_cutoff=min_score)

    if len(bests) == 0:
        best = None
    elif len(bests) == 1:
        best = bests[0]
    else:
        if bests[0][1] == bests[1][1]:
            best = None
        else:
            best = bests[0]

    if best is None:
        raise ValueError('cannot find a good match for {0!r}. '
                         'Your input value is too ambiguous.'.format(value))

    return best if return_score else best[0]
github python-discord / bot / bot / cogs / help.py View on Github external
def _handle_not_found(self, query: str) -> None:
        """
        Handles when a query does not match a valid command or cog.

        Will pass on possible close matches along with the `HelpQueryNotFound` exception.
        """
        # Combine command and cog names
        choices = list(self._bot.all_commands) + list(self._bot.cogs)

        result = process.extractBests(query, choices, scorer=fuzz.ratio, score_cutoff=90)

        raise HelpQueryNotFound(f'Query "{query}" not found.', dict(result))
github m0ngr31 / kodi-voice / kodi_voice / kodi.py View on Github external
if f is not None:
            match_string = f(heard_lower, self.language)
            match_func = f.__name__
          else:
            match_string = heard_lower
            match_func = 'heard'

          match_strings.append(match_string)
          log.info('  %s -> "%s"', match_func, match_string.encode("utf-8"))
        except:
          continue

      fuzzy_results = []
      for ms in set(match_strings):
        log.info('  Trying with "%s"', ms.encode("utf-8"))
        matches = process.extractBests(ms, [d[lookingFor] for d in results], limit=limit, scorer=fuzz.UQRatio, score_cutoff=75)
        if matches:
          log.info('    Best score %d%%', matches[0][1])
          fuzzy_results += matches

      # Got a match?
      if fuzzy_results:
        winners = sorted(fuzzy_results, key=lambda x: x[1], reverse=True)
        log.info('BEST MATCH: "%s" @ %d%%', winners[0][0].encode("utf-8"), winners[0][1])
        for winner in winners:
          located.append((item for item in results if item[lookingFor] == winner[0]).next())
    else:
      log.info('BEST MATCH: "%s"', located[0][lookingFor].encode("utf-8"))

    return located[:limit]
github FoglyOgly / Meowth / meowth / utils / fuzzymatch.py View on Github external
def get_matches(word_list: list, word: str, scorer='fp_ratio', score_cutoff: int = 80, limit: int = 10):
    """Uses fuzzywuzzy to see if word is close to entries in word_list

    Returns a list of tuples with (MATCH, SCORE)
    """

    scorer_dict = {
        'ratio': fuzz.ratio,
        'fp_ratio': fp_ratio
    }
    scorer = scorer_dict[scorer]

    sorted_list = process.extractBests(word, word_list, processor=pre, scorer=scorer, score_cutoff=score_cutoff,
                                       limit=limit)
    great_matches = [x for x in sorted_list if x[1] >= 95]
    if great_matches:
        return great_matches
    good_matches = [x for x in sorted_list if x[1] >= 90]
    if good_matches:
        return good_matches
    else:
        return sorted_list