How to use the rapidfuzz.process function in RapidFuzz

To help you get started, we’ve selected a few RapidFuzz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hugochan / BAMnet / src / core / utils / generic_utils.py View on Github external
def extract_dep_feature(dep_parser, text, topic_ent, question_word):
    dep = dep_parser.raw_parse(text).__next__()
    tree = list(dep.triples())
    topic_ent = list(set(tokenize(topic_ent)) - stop_words)
    text = text.split()

    path_len = 1e5
    topic_ent_to_root = []
    for each in topic_ent:
        ret = process.extractOne(each, text, scorer=fuzz.token_sort_ratio)
        if ret[1] < 85:
            continue
        tmp = find_parent(ret[0], tree, '->')
        if len(tmp) > 0 and len(tmp) < path_len:
            topic_ent_to_root = tmp
            path_len = len(tmp)
    question_word_to_root = find_parent(question_word, tree)
    # if len(question_word_to_root) == 0 or len(topic_ent_to_root) == 0:
        # import pdb;pdb.set_trace()
    return question_word_to_root + list(reversed(topic_ent_to_root[:-1]))
github Jelomite / horrible-downloader / HorribleDownloader / parser.py View on Github external
def _get_show_id(self, show: str) -> int:
        show = show.replace('&', '&')
        try:
            key = fuzzy_match.extractOne(show, self.shows.keys())[0]
        except IndexError:
            return 0
        # assert the user entered a valid show name
        url = "https://horriblesubs.info/shows/" + self.shows[key]
        html = requests.get(url)
        match = re.findall("var hs_showid = \d+", html.text)
        return int(match[0].strip("var hs_showid = "))
github ssine / pptx2md / pptx2md / parser.py View on Github external
def process_title(shape):
    global out
    text = shape.text_frame.text.strip()
    if g.use_custom_title:
        res = fuze_process.extractOne(text, g.titles.keys(), score_cutoff=92)
        if not res:
            g.max_custom_title
            out.put_title(text, g.max_custom_title + 1)
        else:
            print(text, ' transferred to ', res[0], '. the ratio is ', round(res[1]))
            out.put_title(res[0], g.titles[res[0]])
    else:
        out.put_title(text, 1)
github Jonarzz / DotaResponsesRedditBot / parsers / css_parser.py View on Github external
Uses rapidfuzz for fuzzy matching of hero names to name found in `.flair-name` property in css.
    """
    hero_names = db_api.get_all_hero_names()

    response = requests.get(STYLESHEET_URL, headers={'User-Agent': USER_AGENT})
    r = json.loads(response.text)
    stylesheet = r['data']['stylesheet']

    r = re.compile(FLAIR_REGEX)
    for flair in r.finditer(stylesheet):
        flair_css = flair['css_class']
        img_path = flair['img_path']
        flair_hero = img_path[6:]

        match, confidence = process.extractOne(flair_hero, hero_names)
        if confidence >= 90:
            db_api.update_hero(hero_name=match, img_path=img_path, flair_css=flair_css)
github hugochan / BAMnet / src / core / utils / freebase_utils.py View on Github external
def query_kb(kb, ent_name, fuzz_threshold=90):
    results = []
    for k, v in kb.items():
        ret = process.extractOne(ent_name, v['name'] + v['alias'], scorer=fuzz.token_sort_ratio)
        if ret[1] > fuzz_threshold:
            results.append((k, ret[0], ret[1]))
    results = sorted(results, key=lambda d:d[-1], reverse=True)
    return list(zip(*results))[0] if len(results) > 0 else []