Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fuzzy_find_text(self, text, file_name, key, source_list=None, fuzzy_func=fuzz.partial_ratio):
text = text.strip()
if source_list is None:
source_list = self.rr_english[file_name]
# Try faster indexed search first and see if we get any perfect results
if key not in self.rr_english[file_name].index:
self.rr_english[file_name].build_index(key)
results = self.rr_english[file_name].index[key][text]
if len(results) == 1:
return self.rr[file_name][results[0].rowid][key]
# Try to find translation for the name using fuzzy search
results = []
for row in source_list:
ratio = fuzzy_func(row[key], text)
def package_to_merge(decrypted_package, decrypted_packages, known_packages):
title = decrypted_package['name']
mergable = []
mergable_titles = []
mergable_uuids = []
mergable_linkids = []
for dp in decrypted_packages:
if dp['uuid'] not in known_packages:
dp_title = dp['name']
ratio = fuzz.partial_ratio(title, dp_title)
if ratio > 55:
mergable_titles.append(dp_title)
mergable_uuids.append(dp['uuid'])
for l in dp['linkids']:
mergable_linkids.append(l)
elif "Verschiedene Dateien" in dp['name'] or "Various files" in dp['name']:
mergable_titles.append(dp_title)
mergable_uuids.append(dp['uuid'])
for l in dp['linkids']:
mergable_linkids.append(l)
mergable.append([mergable_titles, mergable_uuids, mergable_linkids])
mergable.sort()
return mergable
def get_combined_fuzz_score(self, a, b, mode='geom_mean'):
a, b = clean_name(a), clean_name(b)
simple = float(fuzz.ratio(a, b) * self.weight['simple'])
partial = float(fuzz.partial_ratio(a, b) * self.weight['partial'])
return self.combine_scores(simple, partial, mode=mode)
def correct_postcode_from_uprn(record, addressbase_data):
addressbase_record = addressbase_data[record["uprn"]]
match_quality = fuzz.partial_ratio(
record["address"].lower().replace(",", ""),
addressbase_record["address"].lower().replace(",", ""),
)
if match_quality >= 100:
record["postcode"] = addressbase_record["postcode"]
self.logger.log_message(
logging.INFO,
"Replacing %s with %s for record:\n%s\n",
variable=(
record["postcode"],
addressbase_record["postcode"],
record,
),
)
return True
def rank(self, target, searches, limit=10):
matches = process.extract(
target, searches.keys(), limit=limit, scorer=fuzz.partial_ratio)
matches = [(m[0], m[1]*math.log(searches[m[0]]+1)) for m in matches if m[1]>0]
if matches:
return [m[0] for m in sorted(matches, key=lambda d: -d[1])]
return [target]
if record["postcode"] != addressbase_record["postcode"]:
# The UPRN attached to the input record is present
# in the data we fetched from AddressBase, but the postcode
# on the input record doesn't match the postcode on the
# record from AddressBase
if not fuzzy_match:
self.logger.log_message(
logging.INFO,
"Removing UPRN due to postcode mismatch.\nInput Record:\n%s\nAddressbase record:\n%s",
variable=(record, addressbase_data[record["uprn"]]),
)
record["uprn"] = ""
continue
match_quality = fuzz.partial_ratio(
record["address"].lower().replace(",", ""),
addressbase_record["address"].lower().replace(",", ""),
)
accept_suggestion = record.get(
"accept_suggestion", (match_quality >= match_threshold)
)
if accept_suggestion:
# If [input record address] and [addressbase record address]
# are match_threshold% the same, assume the postcode on
# [input record] is wrong and fix [input record]
# with the postcode from addressbase
self.logger.log_message(
logging.INFO,
"Correcting postcode based on UPRN and fuzzy match.\nInput Record:\n%s\nAddressbase record:\n%s\nMatch quality: %s\n",
variable=(record, addressbase_record, round(match_quality)),