How to use the fuzzywuzzy.fuzz.ratio function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github seatgeek / fuzzywuzzy / test_fuzzywuzzy.py View on Github external
def testRatioUnicodeString(self):
        s1 = "\u00C1"
        s2 = "ABCD"
        score = fuzz.ratio(s1, s2)
        self.assertEqual(0, score)
github research-software-directory / research-software-directory / src / services / publication.py View on Github external
def match_names(p1_full_name, p2_first_name, p2_last_name):
    """Match full name (person 1) against known first name & last name (person 2)"""
    p2_full_name = p2_first_name + ' ' + p2_last_name
    return max(fuzz.ratio(p2_full_name, p1_full_name), fuzz.ratio(p2_last_name, p1_full_name.split(" ")[-1]))
github vered1986 / OKR / src / baseline_system / eval_entity_coref.py View on Github external
def fuzzy_fit(x, y):
    """
    Returns whether x and y are similar in fuzzy string matching
    :param x: the first mention
    :param y: the second mention
    :return: whether x and y are similar in fuzzy string matching
    """
    if fuzz.ratio(x, y) >= 90:
        return True

    # Convert numbers to words
    x_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in x.split()]
    y_words = [num2words(int(w)).replace('-', ' ') if w.isdigit() else w for w in y.split()]

    return fuzz.ratio(' '.join(x_words), ' '.join(y_words)) >= 85
github deepmipt / DeepPavlov / deeppavlov / models / kbqa / entity_linking_wikidata.py View on Github external
entity_lemm.append(tok_2)
                    entity_lemm = ' '.join(entity_lemm)
                    if entity_lemm != entity:
                        candidate_entities += self.name_to_q.get(entity_lemm, [])
                            
                srtd_cand_ent = sorted(candidate_entities, key=lambda x: x[2], reverse=True)
                if len(srtd_cand_ent) > 0:
                    wiki_entities_batch.append([srtd_cand_ent[i][1] for i in range(len(srtd_cand_ent))])
                    confidences.append([1.0 for i in range(len(srtd_cand_ent))])
                else:
                    word_length = len(entity)
                    candidates = []
                    for title in self.name_to_q:
                        length_ratio = len(title) / word_length
                        if length_ratio > 0.5 and length_ratio < 1.5:
                            ratio = fuzz.ratio(title, entity)
                            if ratio > 50:
                                entity_candidates = self.name_to_q.get(title, [])
                                for cand in entity_candidates:
                                    candidates.append((cand, fuzz.ratio(entity, cand[0])))
                    
                    candidates = list(set(candidates))
                    srtd_cand_ent = sorted(candidates, key=lambda x: x[1], reverse=True)
                    
                    if len(srtd_cand_ent) > 0:
                        wiki_entities_batch.append([srtd_cand_ent[i][0][1] for i in range(len(srtd_cand_ent))])
                        confidences.append([srtd_cand_ent[i][1]*0.01 for i in range(len(srtd_cand_ent))])
                    else:
                        wiki_entities_batch.append(["None"])
                        confidences.append([0.0])
        
        if self._debug:
github joealcorn / TweetPoster / TweetPoster / utils.py View on Github external
def tweet_in_title(tweet, submission):
    similarity = fuzz.ratio(tweet.text, submission.title)
    if (similarity >= 85 or
            tweet.text.lower() in submission.title.lower()):
        return True
    return False
github craws / OpenAtlas / openatlas / models / api_helpers / api_sql.py View on Github external
def get_similar_named(form: FlaskForm) -> Dict[int, Any]:
        class_ = form.classes.data
        if class_ in ['source', 'event', 'actor']:
            entities = Query.get_by_menu_item(class_)
        else:
            entities = Query.get_by_system_type(class_)
        similar: Dict[int, Any] = {}
        already_added: Set[int] = set()
        for sample in entities:
            if sample.id in already_added:
                continue
            similar[sample.id] = {'entity': sample, 'entities': []}
            for entity in entities:
                if sample.id == entity.id:
                    continue
                if fuzz.ratio(sample.name, entity.name) >= form.ratio.data:
                    already_added.add(sample.id)
                    already_added.add(entity.id)
                    similar[sample.id]['entities'].append(entity)
        return {similar: data for similar, data in similar.items() if data['entities']}
github castorini / BuboQA / scripts / augment_process_dataset.py View on Github external
        n_gram_candidate = sorted(n_gram_candidate, key=lambda x: fuzz.ratio(x[0], v), reverse=True)
        top = n_gram_candidate[0]
github Run1e / AceBot / cogs / games.py View on Github external
async def convert(self, ctx, argument):
		fuzzed = process.extract(
			query=argument,
			choices=ctx.cog.trivia_categories.keys(),
			scorer=fuzz.ratio,
			limit=1,
		)

		res, score = fuzzed[0]

		if score < 76:
			# will never be shown so no need to prettify it
			raise ValueError()

		_id = ctx.cog.trivia_categories[res]
		return choice(_id) if isinstance(_id, list) else _id
github Rohithgilla12 / TwitterNewsAnalyserBot / NewsComparer / scripts / main.py View on Github external
def compare(title1_list,link1_list,title2_list,link2_list):
    compare_dict=[]
    for title1 in title1_list:
        maxVal=10
        t1=title1
        l1=link1_list[title1_list.index(title1)]
        t2=""
        l2=""
        for title2 in title2_list:
            val=fuzz.ratio(title1, title2)                
            if maxVal