How to use RapidFuzz - 10 common examples

To help you get started, we’ve selected a few RapidFuzz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hugochan / BAMnet / src / core / utils / generic_utils.py View on Github external
def extract_dep_feature(dep_parser, text, topic_ent, question_word):
    dep = dep_parser.raw_parse(text).__next__()
    tree = list(dep.triples())
    topic_ent = list(set(tokenize(topic_ent)) - stop_words)
    text = text.split()

    path_len = 1e5
    topic_ent_to_root = []
    for each in topic_ent:
        ret = process.extractOne(each, text, scorer=fuzz.token_sort_ratio)
        if ret[1] < 85:
            continue
        tmp = find_parent(ret[0], tree, '->')
        if len(tmp) > 0 and len(tmp) < path_len:
            topic_ent_to_root = tmp
            path_len = len(tmp)
    question_word_to_root = find_parent(question_word, tree)
    # if len(question_word_to_root) == 0 or len(topic_ent_to_root) == 0:
        # import pdb;pdb.set_trace()
    return question_word_to_root + list(reversed(topic_ent_to_root[:-1]))
github rix1337 / RSScrawler / rsscrawler / search.py View on Github external
sj_search = get_url(decode_base64("aHR0cHM6Ly9zZXJpZW5qdW5raWVzLm9yZy9zZXJpZS9zZWFyY2g/cT0=") + sj_query,
                            configfile, dbfile, scraper)
        try:
            sj_results = BeautifulSoup(sj_search, 'lxml').findAll("a", href=re.compile("/serie"))
        except:
            sj_results = []

        if special:
            append = " (" + special + ")"
        else:
            append = ""
        i = 0
        results = {}
        for result in sj_results:
            r_title = result.text
            r_rating = fuzz.ratio(title.lower(), r_title)
            if r_rating > 40:
                res = {"payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)),
                       "title": r_title + append}
                results["result" + str(i + 1000)] = res
                i += 1
        sj_final = results

    return bl_final, sj_final
github jaakkopasanen / AutoEq / measurements / manufacturer_index.py View on Github external
def search(self, name, threshold=80):
        matches = []
        for manufacturer in self.manufacturers:
            for variant in manufacturer:
                # Search with false name
                ratio = fuzz.ratio(variant.lower(), name.lower())
                if ratio > threshold:
                    matches.append((manufacturer[0], ratio))
        return sorted(matches, key=lambda x: x[1], reverse=True)
github cutright / DVH-Analytics / dvha / tools / name_prediction.py View on Github external
def get_combined_fuzz_score(self, a, b, mode='geom_mean'):
        a, b = clean_name(a), clean_name(b)

        simple = float(fuzz.ratio(a, b) * self.weight['simple'])
        partial = float(fuzz.partial_ratio(a, b) * self.weight['partial'])

        return self.combine_scores(simple, partial, mode=mode)
github ssine / pptx2md / pptx2md / outputter.py View on Github external
def put_title(self, text, level):
        text = text.strip()
        if not fuzz.ratio(text, g.last_title.get(level, ''), score_cutoff=92):
            self.ofile.write('#'*level + ' ' + text + '\n\n')
            g.last_title[level] = text
github createcandle / voco / pkg / voco_adapter.py View on Github external
continue

                try:
                    #if self.DEBUG:
                        #print("")
                        #print("___" + current_thing_title)
                    probable_thing_title_confidence = 100
                    
                    if target_thing_title == None:  # If no thing title provided, we go over every thing and let the property be leading in finding a match.
                        pass
                    
                    elif target_thing_title == current_thing_title:   # If the thing title is a perfect match
                        probable_thing_title = current_thing_title
                        if self.DEBUG:
                            print("FOUND THE CORRECT THING: " + str(current_thing_title))
                    elif fuzz.ratio(str(target_thing_title), current_thing_title) > 85:  # If the title is a fuzzy match
                        if self.DEBUG:
                            print("This thing title is pretty similar, so it could be what we're looking for: " + str(current_thing_title))
                        probable_thing_title = current_thing_title
                        probable_thing_title_confidence = 85
                    elif target_space != None:
                        space_title = str(target_space) + " " + str(target_thing_title)
                        #if self.DEBUG:
                        #   print("space title = " + str(target_space) + " + " + str(target_thing_title))
                        if fuzz.ratio(space_title, current_thing_title) > 85:
                            probable_thing_title = space_title
                        
                    elif current_thing_title.startswith(target_thing_title):
                        if self.DEBUG:
                            print("partial match:" + str(len(current_thing_title) / len(target_thing_title)))
                        if len(current_thing_title) / len(target_thing_title) < 2:
                            # The strings mostly start the same, so this might be a match.
github createcandle / voco / pkg / voco_adapter.py View on Github external
pass
                    
                    elif target_thing_title == current_thing_title:   # If the thing title is a perfect match
                        probable_thing_title = current_thing_title
                        if self.DEBUG:
                            print("FOUND THE CORRECT THING: " + str(current_thing_title))
                    elif fuzz.ratio(str(target_thing_title), current_thing_title) > 85:  # If the title is a fuzzy match
                        if self.DEBUG:
                            print("This thing title is pretty similar, so it could be what we're looking for: " + str(current_thing_title))
                        probable_thing_title = current_thing_title
                        probable_thing_title_confidence = 85
                    elif target_space != None:
                        space_title = str(target_space) + " " + str(target_thing_title)
                        #if self.DEBUG:
                        #   print("space title = " + str(target_space) + " + " + str(target_thing_title))
                        if fuzz.ratio(space_title, current_thing_title) > 85:
                            probable_thing_title = space_title
                        
                    elif current_thing_title.startswith(target_thing_title):
                        if self.DEBUG:
                            print("partial match:" + str(len(current_thing_title) / len(target_thing_title)))
                        if len(current_thing_title) / len(target_thing_title) < 2:
                            # The strings mostly start the same, so this might be a match.
                            probable_thing_title = current_thing_title
                            probable_thing_title_confidence = 25
                    else:
                        # A title was provided, but we were not able to match it to the current things. Perhaps we can get a property-based match.
                        continue
                        
                except Exception as ex:
                    print("Error while trying to match title: " + str(ex))
github createcandle / voco / pkg / voco_adapter.py View on Github external
result.append(match_dict.copy())
                                continue

                        # Looking for a value
                        elif target_property_title == 'value' and match_dict['thing'] != None:
                            result.append(match_dict.copy())
                            continue
                            
                        # Looking for 'all' properties
                        elif target_property_title == 'all' and match_dict['thing'] != None:
                            #If all properties are desired, add all properties
                            result.append(match_dict.copy())
                            continue
                        
                        # We found a good matching property title and already found a good matching thing title. # TODO: shouldn't this be higher up?
                        elif fuzz.ratio(current_property_title, target_property_title) > 85:
                            if self.DEBUG:
                                print("FOUND A PROPERTY WITH THE MATCHING FUZZY NAME")
                            if match_dict['thing'] == None:
                                match_dict['thing'] = current_thing_title
                                result.append(match_dict.copy())
                            else:
                                result = [] # Since this is a really good match, we remove any older properties we may have found.
                                result.append(match_dict.copy())
                                return result

                            
                        # We're looking for a numbered property (e.g. moisture 5), and this property has that number in it. Here we favour sensors. # TODO: add ordinal support?
                        elif str(numerical_index) in current_property_title and target_thing_title != None:
                            result.append(match_dict.copy())
                            
                            if thing['properties'][thing_property_key]['type'] == 'boolean' and probability_of_correct_property == 0:
github jaakkopasanen / AutoEq / measurements / name_index.py View on Github external
def search_by_false_name(self, name, threshold=80):
        """Finds all items which match closely to all given query parameters.

        Args:
            name: Name to search by. Ignored if None.
            threshold: Threshold for matching with RapidFuzz.

        Returns:
            List of matching triplets with NameItem, RapidFuzz ratio and RapidFuzz token_set_ratio
        """
        matches = []
        for item in self.items:
            # Search with false name
            ratio = fuzz.ratio(item.false_name, name)
            token_set_ratio = fuzz.token_set_ratio(item.false_name.lower(), name.lower())
            if ratio > threshold or token_set_ratio > threshold:
                matches.append((item, ratio, token_set_ratio))
        return sorted(matches, key=lambda x: x[1], reverse=True)
github giulionf / realtweetornotbot / src / realtweetornotbot / bot / twittersearch / tweetfinder.py View on Github external
def __score_result(tweet, search_criteria):
        score = fuzz.token_sort_ratio(tweet.text, search_criteria.content)
        return score