How to use the rapidfuzz.process.extract function in RapidFuzz

To help you get started, we’ve selected a few RapidFuzz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hugochan / BAMnet / src / core / build_data / build_data.py View on Github external
def delex_query_topic_ent(query, topic_ent, ent_types):
    query = tokenize(query.lower())
    if topic_ent == '':
        return query, None

    ent_type_dict = {}
    for ent, type_ in ent_types:
        if ent not in ent_type_dict:
            ent_type_dict[ent] = type_
        else:
            if ent_type_dict[ent] == 'NP':
                ent_type_dict[ent] = type_

    ret = process.extract(topic_ent.replace('_', ' '), set(list(zip(*ent_types))[0]), scorer=fuzz.token_sort_ratio)
    if len(ret) == 0:
        return query, None

    # We prefer Non-NP entity mentions
    # e.g., we prefer `uk` than `people in the uk` when matching `united_kingdom`
    topic_men = None
    topic_score = None
    for token, score in ret:
        if ent_type_dict[token].lower() in config.topic_mention_types:
            topic_men = token
            topic_score = score
            break

    if topic_men is None:
        return query, None
github OpenJarbas / audiobooker / audiobooker / scrappers / __init__.py View on Github external
Returns:
            list : list of AudioBook objects
        """
        # priority for title matches
        alll = self.get_all_audiobooks()
        if title:
            for res in process.extract(title, alll, limit=limit):
                match, score = res
                yield match
                alll.remove(match)

        # second author matches
        if author:
            choices = [" ".join([str(a) for a in b.authors]) for b in alll]
            for res in process.extract(author, choices, limit=limit):
                match, score = res
                match = alll[choices.index(match)]
                yield match
                alll.remove(match)
github blaulan / alfred-beancount / beancount.py View on Github external
def rank(self, target, searches, limit=10):
        matches = process.extract(
            target, searches.keys(), limit=limit, scorer=fuzz.partial_ratio)
        matches = [(m[0], m[1]*math.log(searches[m[0]]+1)) for m in matches if m[1]>0]
        if matches:
            return [m[0] for m in sorted(matches, key=lambda d: -d[1])]
        return [target]
github OpenJarbas / audiobooker / audiobooker / scrappers / __init__.py View on Github external
"""

        Args:
            since: a UNIX timestamp; returns all projects cataloged since that time
            author: all records by that author last name
            title: all matching titles
            genre: all projects of the matching genre
            limit: max entries to return (int)

        Returns:
            list : list of AudioBook objects
        """
        # priority for title matches
        alll = self.get_all_audiobooks()
        if title:
            for res in process.extract(title, alll, limit=limit):
                match, score = res
                yield match
                alll.remove(match)

        # second author matches
        if author:
            choices = [" ".join([str(a) for a in b.authors]) for b in alll]
            for res in process.extract(author, choices, limit=limit):
                match, score = res
                match = alll[choices.index(match)]
                yield match
                alll.remove(match)