How to use the rapidfuzz.fuzz.partial_ratio function in RapidFuzz

To help you get started, we’ve selected a few RapidFuzz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github OmegaK2 / PyPoE / PyPoE / cli / exporter / wiki / admin / unique.py View on Github external
    def fuzzy_find_text(self, text, file_name, key, source_list=None, fuzzy_func=fuzz.partial_ratio):
        text = text.strip()

        if source_list is None:
            source_list = self.rr_english[file_name]

        # Try faster indexed search first and see if we get any perfect results
        if key not in self.rr_english[file_name].index:
            self.rr_english[file_name].build_index(key)
        results = self.rr_english[file_name].index[key][text]
        if len(results) == 1:
            return self.rr[file_name][results[0].rowid][key]

        # Try to find translation for the name using fuzzy search
        results = []
        for row in source_list:
            ratio = fuzzy_func(row[key], text)
github rix1337 / RSScrawler / rsscrawler / myjd.py View on Github external
def package_to_merge(decrypted_package, decrypted_packages, known_packages):
    title = decrypted_package['name']
    mergable = []
    mergable_titles = []
    mergable_uuids = []
    mergable_linkids = []
    for dp in decrypted_packages:
        if dp['uuid'] not in known_packages:
            dp_title = dp['name']
            ratio = fuzz.partial_ratio(title, dp_title)
            if ratio > 55:
                mergable_titles.append(dp_title)
                mergable_uuids.append(dp['uuid'])
                for l in dp['linkids']:
                    mergable_linkids.append(l)
            elif "Verschiedene Dateien" in dp['name'] or "Various files" in dp['name']:
                mergable_titles.append(dp_title)
                mergable_uuids.append(dp['uuid'])
                for l in dp['linkids']:
                    mergable_linkids.append(l)

    mergable.append([mergable_titles, mergable_uuids, mergable_linkids])
    mergable.sort()
    return mergable
github cutright / DVH-Analytics / dvha / tools / name_prediction.py View on Github external
def get_combined_fuzz_score(self, a, b, mode='geom_mean'):
        a, b = clean_name(a), clean_name(b)

        simple = float(fuzz.ratio(a, b) * self.weight['simple'])
        partial = float(fuzz.partial_ratio(a, b) * self.weight['partial'])

        return self.combine_scores(simple, partial, mode=mode)
github DemocracyClub / UK-Polling-Stations / polling_stations / apps / data_collection / data_types.py View on Github external
def correct_postcode_from_uprn(record, addressbase_data):
            addressbase_record = addressbase_data[record["uprn"]]
            match_quality = fuzz.partial_ratio(
                record["address"].lower().replace(",", ""),
                addressbase_record["address"].lower().replace(",", ""),
            )
            if match_quality >= 100:
                record["postcode"] = addressbase_record["postcode"]
                self.logger.log_message(
                    logging.INFO,
                    "Replacing %s with %s for record:\n%s\n",
                    variable=(
                        record["postcode"],
                        addressbase_record["postcode"],
                        record,
                    ),
                )
                return True
github blaulan / alfred-beancount / beancount.py View on Github external
def rank(self, target, searches, limit=10):
        matches = process.extract(
            target, searches.keys(), limit=limit, scorer=fuzz.partial_ratio)
        matches = [(m[0], m[1]*math.log(searches[m[0]]+1)) for m in matches if m[1]>0]
        if matches:
            return [m[0] for m in sorted(matches, key=lambda d: -d[1])]
        return [target]
github DemocracyClub / UK-Polling-Stations / polling_stations / apps / data_collection / data_types.py View on Github external
if record["postcode"] != addressbase_record["postcode"]:
                # The UPRN attached to the input record is present
                # in the data we fetched from AddressBase, but the postcode
                # on the input record doesn't match the postcode on the
                # record from AddressBase

                if not fuzzy_match:
                    self.logger.log_message(
                        logging.INFO,
                        "Removing UPRN due to postcode mismatch.\nInput Record:\n%s\nAddressbase record:\n%s",
                        variable=(record, addressbase_data[record["uprn"]]),
                    )
                    record["uprn"] = ""
                    continue

                match_quality = fuzz.partial_ratio(
                    record["address"].lower().replace(",", ""),
                    addressbase_record["address"].lower().replace(",", ""),
                )

                accept_suggestion = record.get(
                    "accept_suggestion", (match_quality >= match_threshold)
                )
                if accept_suggestion:
                    # If [input record address] and [addressbase record address]
                    # are match_threshold% the same, assume the postcode on
                    # [input record] is wrong and fix [input record]
                    # with the postcode from addressbase
                    self.logger.log_message(
                        logging.INFO,
                        "Correcting postcode based on UPRN and fuzzy match.\nInput Record:\n%s\nAddressbase record:\n%s\nMatch quality: %s\n",
                        variable=(record, addressbase_record, round(match_quality)),