Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
# Draw a list of random strings
strings = data.draw(
st.lists(st.text(min_size=10, max_size=100),
min_size=1, max_size=50))
# Draw a random integer for the index in that list
choiceidx = data.draw(st.integers(min_value=0, max_value=(len(strings) - 1)))
# Extract our choice from the list
choice = strings[choiceidx]
# Check process doesn't make our choice the empty string
assume(processor(choice) != '')
# Extract all perfect matches
result = process.extractBests(choice,
strings,
scorer=scorer,
processor=processor,
score_cutoff=100,
limit=None)
# Check we get a result
assert result != []
# Check THE ONLY result(s) we get are a perfect match for the (processed) original data
pchoice = processor(choice)
for r in result:
assert pchoice == processor(r[0])
"""
# Draw a list of random strings
strings = data.draw(
st.lists(st.text(min_size=10, max_size=100),
min_size=1, max_size=50))
# Draw a random integer for the index in that list
choiceidx = data.draw(st.integers(min_value=0, max_value=(len(strings) - 1)))
# Extract our choice from the list
choice = strings[choiceidx]
# Check process doesn't make our choice the empty string
assume(processor(choice) != '')
# Extract all perfect matches
result = process.extractBests(choice,
strings,
scorer=scorer,
processor=processor,
score_cutoff=100,
limit=None)
# Check we get a result
assert result != []
# Check the original is in the list
assert (choice, 100) in result
def search_docsets(self, widget):
# TODO: We should move this off of the main thread for performance
query = widget.get_text().strip()
if query:
self.__results = search(query, self.__application.choices,
processor=lambda x: x.name)
else:
self.__results = None
self.__sidebar_filter.refilter()
def get_best_fuzzy(name, choices, cutoff=0, return_score=False):
items = process.extractBests(name, choices, score_cutoff=cutoff)
if not items:
return None
elif len(items) == 1:
best = items[0]
else:
scores = [s[1] for s in items]
# finds items with the same score
morethanone = sum(np.max(scores) == scores) > 1
if morethanone:
# tries to find an exact string match
exact = []
for s in items:
itemname = s[0].name if isinstance(s[0], QueryParameter) else s[0]
if itemname.lower() == name.lower():
exact.append(s)
Kwargs:
limit (int): The maximum number of match candidates to retrieve
from fuzzywuzzy. The length of the returned array may be longer, if
the set of addresses has identical addresses that are good matches
for the expected address (i.e. if two geocoders resolve to the same
string address).
Returns:
list. Return value is a list of tuples, where each tuple contains a
geopy Location, and a matching score based on an extension of the
Levenshtien distance between the expectation and the Location's
address (a higher score is a better match). The algorithm is
implemented by SeatGeek's fuzzywuzzy, and you can read more here:
http://chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/
"""
extractions = fuzzyprocess.extractBests(
expectation, [str(a) for a in self.addresses],
limit=limit)
result = []
for extraction in extractions:
result.extend([(x, extraction[1]) for x in self.addresses
if str(x) == extraction[0]])
return result
raise ValueError('invalid value. Must be a string.')
if len(value) < 3:
raise ValueError('your fuzzy search value must be at least three characters long.')
if len(choices) == 0:
raise ValueError('choices cannot be an empty list.')
# If the value contains _ivar or _mask this is probably and incorrect use
# of the fuzzy feature. We raise an error.
if '_ivar' in value:
raise ValueError('_ivar not allowd in search value.')
elif '_mask' in value:
raise ValueError('_mask not allowd in search value.')
bests = fuzz_proc.extractBests(value, choices, scorer=scorer, score_cutoff=min_score)
if len(bests) == 0:
best = None
elif len(bests) == 1:
best = bests[0]
else:
if bests[0][1] == bests[1][1]:
best = None
else:
best = bests[0]
if best is None:
raise ValueError('cannot find a good match for {0!r}. '
'Your input value is too ambiguous.'.format(value))
return best if return_score else best[0]
def _handle_not_found(self, query: str) -> None:
"""
Handles when a query does not match a valid command or cog.
Will pass on possible close matches along with the `HelpQueryNotFound` exception.
"""
# Combine command and cog names
choices = list(self._bot.all_commands) + list(self._bot.cogs)
result = process.extractBests(query, choices, scorer=fuzz.ratio, score_cutoff=90)
raise HelpQueryNotFound(f'Query "{query}" not found.', dict(result))
if f is not None:
match_string = f(heard_lower, self.language)
match_func = f.__name__
else:
match_string = heard_lower
match_func = 'heard'
match_strings.append(match_string)
log.info(' %s -> "%s"', match_func, match_string.encode("utf-8"))
except:
continue
fuzzy_results = []
for ms in set(match_strings):
log.info(' Trying with "%s"', ms.encode("utf-8"))
matches = process.extractBests(ms, [d[lookingFor] for d in results], limit=limit, scorer=fuzz.UQRatio, score_cutoff=75)
if matches:
log.info(' Best score %d%%', matches[0][1])
fuzzy_results += matches
# Got a match?
if fuzzy_results:
winners = sorted(fuzzy_results, key=lambda x: x[1], reverse=True)
log.info('BEST MATCH: "%s" @ %d%%', winners[0][0].encode("utf-8"), winners[0][1])
for winner in winners:
located.append((item for item in results if item[lookingFor] == winner[0]).next())
else:
log.info('BEST MATCH: "%s"', located[0][lookingFor].encode("utf-8"))
return located[:limit]
def get_matches(word_list: list, word: str, scorer='fp_ratio', score_cutoff: int = 80, limit: int = 10):
"""Uses fuzzywuzzy to see if word is close to entries in word_list
Returns a list of tuples with (MATCH, SCORE)
"""
scorer_dict = {
'ratio': fuzz.ratio,
'fp_ratio': fp_ratio
}
scorer = scorer_dict[scorer]
sorted_list = process.extractBests(word, word_list, processor=pre, scorer=scorer, score_cutoff=score_cutoff,
limit=limit)
great_matches = [x for x in sorted_list if x[1] >= 95]
if great_matches:
return great_matches
good_matches = [x for x in sorted_list if x[1] >= 90]
if good_matches:
return good_matches
else:
return sorted_list