How to use the fuzzywuzzy.process.extract function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def test_dict_like_extract(self):
        """We should be able to use a dict-like object for choices, not only a
        dict, and still get dict-like output.
        """
        try:
            from UserDict import UserDict
        except ImportError:
            from collections import UserDict
        choices = UserDict({'aa': 'bb', 'a1': None})
        search = 'aaa'
        result = process.extract(search, choices)
        self.assertTrue(len(result) > 0)
        for value, confidence, key in result:
            self.assertTrue(value in choices.values())
github yeatmanlab / pyAFQ / .maintenance / update_zenodo.py View on Github external
* (git_line_summary_path is None))

    data = [' '.join(line.strip().split()[1:-1]) for line in lines if '%'
            in line]

    # load zenodo from master
    zenodo_file = Path('.zenodo.json')
    zenodo = json.loads(zenodo_file.read_text())
    zen_names = [' '.join(val['name'].split(',')[::-1]).strip()
                 for val in zenodo['creators']]
    total_names = len(zen_names) + len(MISSING_ENTRIES)

    name_matches = []
    position = 1
    for ele in data:
        matches = process.extract(ele, zen_names, scorer=fuzz.token_sort_ratio,
                                  limit=2)
        # matches is a list:
        # [('First match', % Match), ('Second match', % Match)]
        if matches[0][1] > 80:
            val = zenodo['creators'][zen_names.index(matches[0][0])]
        else:
            # skip unmatched names
            print("No entry to sort:", ele)
            continue

        if val not in name_matches:
            if val['name'] not in CREATORS_LAST:
                val['position'] = position
                position += 1
            else:
                val['position'] = total_names + \
github depthsecurity / armory / armory / included / reports / ADUserCreds.py View on Github external
def search_term(self, txt, pw_count):
        pws = pw_count.keys()
        if type(txt) == str:
            txt = [txt]
        total_matches = 0
        for t in txt:
            matches = [r[0] for r in process.extract(t, pws, limit=None) if r[1] > 75]

            total_matches += sum([pw_count[p]["count"] for p in matches])

        return total_matches
github Edinburgh-Genome-Foundry / Plateo / plateo / tools.py View on Github external
def did_you_mean(name, other_names, limit=5, min_score=50):
    if isinstance(name, (list, tuple)):
        return {
            n: did_you_mean(n, other_names, limit=limit, min_score=min_score)
            for n in name
        }
    results = process.extract(name, list(other_names), limit=limit)
    return [e for (e, score) in results if score >= min_score]
github avrae / avrae / cogs5e / funcs / lookup_ml.py View on Github external
"""Fuzzy searches a list for an object
    result can be either an object or list of objects
    :param list_to_search: The list to search.
    :param value: The value to search for.
    :param key: A function defining what to search for.
    :param cutoff: The scorer cutoff value for fuzzy searching.
    :param return_key: Whether to return the key of the object that matched or the object itself.
    :returns: A two-tuple (result, strict) or None"""
    # full match, return result
    result = next((a for a in list_to_search if value.lower() == key(a).lower()), None)
    if result is None:
        partial_matches = [a for a in list_to_search if value.lower() in key(a).lower()]
        if len(partial_matches) > 1 or not partial_matches:
            names = [key(d) for d in list_to_search]
            fuzzy_map = {key(d): d for d in list_to_search}
            fuzzy_results = [r for r in process.extract(value, names, scorer=fuzz.ratio) if r[1] >= cutoff]
            fuzzy_sum = sum(r[1] for r in fuzzy_results)
            fuzzy_matches_and_confidences = [(fuzzy_map[r[0]], r[1] / fuzzy_sum) for r in fuzzy_results]
            # hardcoded to return only non-homebrew spells
            net_matches, net_confidences = get_spell_model_predictions(value, 10)

            # display the results in order of confidence
            weighted_results = []
            weighted_results.extend((match, confidence) for match, confidence in zip(net_matches, net_confidences))
            weighted_results.extend((match, confidence) for match, confidence in fuzzy_matches_and_confidences)
            weighted_results.extend((match, len(value) / len(key(match))) for match in partial_matches)
            sorted_weighted = sorted(weighted_results, key=lambda e: e[1], reverse=True)
            log.debug('\n'.join(f"{key(r[0])}: {r[1]:.2f}" for r in sorted_weighted))

            # build results list, unique
            results = []
            for r in sorted_weighted:
github xdustinface / SmartNodeMonitorBot / src / faq.py View on Github external
def parse(bot, args):

    message = help()

    if len(args):

        ask = " ".join(args)

        choices = fuzzy.extract(ask,faqs.keys(),limit=2)

        if choices[0][1] == choices[1][1] or choices[0][1] < 60:
            log.warning('Invalid fuzzy result {} - {}\n'.format(ask, choices))
            message = unknown(ask)
        else:

            topic = choices[0][0]

            message = "<b>" + faqs[topic].question + "<b>\n\n" + faqs[topic].answerCB(bot)

    return messages.markdown(header + message,bot.messenger)
</b></b>
github sharadbhat / VideoHub / Server / fuzzy_search.py View on Github external
def fuzzy(search_key, videos, video_titles):
    """
    - Returns a list of closest matching video IDs.
    """
    best_matches = process.extract(search_key, video_titles, limit=10)
    best_match_titles = []
    for match in best_matches:
        best_match_titles.append(match[0])
    best_match_IDs = []
    for title in best_match_titles:
        for ID in videos:
            if title == videos[ID]:
                best_match_IDs.append(ID)

    return best_match_IDs
github eguilg / nl2sql / code / sqlnet / utils.py View on Github external
if type == 'text':
				for item in list(thesaurus_dic.keys()):
					if item in cur_str:
						cur_str_the = re.sub(item,thesaurus_dic[item],cur_str)
						candidates[cur_str_the] = (st, ed)
			candidates[cur_str] = (st, ed)
			cur_idx += 1
	# if tback_flag:
	# 	target_str = copy_target_str

	if list(candidates.keys()) is None or len(list(candidates.keys())) == 0:
		# print('-----testnone----',target_str, tokens,ngrams)
		return -1, -1

	target_str = str(target_str).replace('-', '')
	resultsf = process.extract(target_str, list(candidates.keys()), limit=10, processor=my_process, scorer=my_scorer)
	results = extact_sort(target_str, list(candidates.keys()), limit=10)
	if not results or not resultsf:
		return -1, -1
	dchosen, dcscore = results[0]
	fchosen, fcscore = resultsf[0]
	if fcscore > dcscore:
		cscore = fcscore
		chosen = fchosen
	else:
		cscore = dcscore
		chosen = dchosen

	if cscore !=100:
		pass
		#q = ''.join(tokens).replace('##','')
		#score = '%d'%(cscore)
github Run1e / AceBot / utils / docs_search.py View on Github external
def find_page(query):
		matches = process.extract(
			query,
			docs.keys(),
			scorer=fuzz.partial_ratio,
			limit=99999
		)

		for match, score in matches:
			if query.upper() == ''.join(filter(str.isupper, match)) or match.lower().startswith(query.lower()):
				return match

		return matches[0][0]
github bergercookie / xkcd-albert-plugin / __init__.py View on Github external
if query.isTriggered:
        # be backwards compatible with v0.2
        if "disableSort" in dir(query):
            query.disableSort()

        try:
            with open(xkcd_dict, "r", encoding="utf-8") as f:
                d = json.load(f)


            if len(query.string) in [0, 1]:  # Display all items
                for k, v in d.items():
                    results.append(get_as_item(k, v))
            else:  # fuzzy search
                desc_to_item = {item[1]["description"]: item for item in d.items()}
                matched = process.extract(
                    query.string.strip(), list(desc_to_item.keys()), limit=20
                )
                for m in [elem[0] for elem in matched]:
                    # bypass a unicode issue - use .get
                    item = desc_to_item.get(m)
                    if item:
                        results.append(get_as_item(*item))

        except Exception as e:  # user to report error
            results.insert(
                0,
                v0.Item(
                    id=__prettyname__,
                    icon=iconPath,
                    text="Something went wrong! Press [ENTER] to copy error and report it",
                    actions=[