How to use the fuzzywuzzy.process function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github HashCode55 / Sniper / sniper / sniper.py View on Github external
USAGE 

    - sniper find 
    """                                       
    # give preference to the name first 
    data = open_store()
    query = query.strip().lower()
    names = list(data.keys())
    if len(names) == 0: 
        raise SniperError('No snippets saved.')        
    # merge key name and description just for searching 
    # putting it in dit for extraction 
    content = {(key + ' ' + data[key]['DESC']):key for key in data.keys()}
    # get the result 
    result = process.extract(query, content.keys())    
    # extract
    result = [(content[data], score) for data, score in result]        
    # now simply print the first 5 results 
    click.echo("These are the top five matching results: \n")
    name_h, desc_h = 'NAME', 'DESC'
    click.echo(name_h.ljust(LJUST) + desc_h.ljust(LJUST))        
    # use set to remove duplicate entries     
    result = list(set(result))     
    result.sort(key = lambda x: x[1], reverse=True)
    result = result[:5]
    # print the result     
    for name, _ in result:                        
        desc = data[name]['DESC']        
        name = name.ljust(LJUST)
        if len(desc) > 25:
            desc = desc[:25] + '...'
github robinandeer / cosmid / cosmid / core.py View on Github external
>>> resource = registry.get("ccds")
      >>> registry.matchOne(104, resource.versions())
      'Hs104'

      >>> registry.matchOne("ensembl", registry.ls())
      'ensembl_assembly'

    :param object target: Any Python object to match with
    :param list options: A list of possible options to match against
    :param int threshold: A lower threshold for accepting a best match
    :returns: The object with the best match (unless score is below threshold)
    :rtype: Python object
    """
    # Match against the options and extract the top match only
    result, score = process.extractOne(target, map(str, options))

    # Arbitrary lower limit for returning a *mathcing* result
    if score >= threshold:
      return result
    else:
      return None
github alpha-beta-soup / errorgeopy / errorgeopy / address.py View on Github external
for detail on the deduplication algorithm implementation. This
            method does not modify the :code:`Address.addresses`. property.

        Kwargs:
            threshold (int): the numerical value (0,100) point at which you
            expect to find duplicates. Defaults to 95 out of 100, which is
            higher than the fuzzywuzzy default (70); this higher threshold is
            used by defauly since addresses are more sensitive to small changes
            (e.g. "250 Main Street" and "150 Main Street" have a small edit
            distance when considered as strings, but may have a reasonably large
            physical distance when considered as physical addresses).
        Returns:
            A list of :code:`geopy.location.Location` objects (essentially a
            filtered list of the original set).
        """
        return fuzzyprocess.dedupe([str(a) for a in self.addresses], threshold)
github openelections / openelections-core / openelex / us / nh / load.py View on Github external
def _find_precinct_details(self, precinct, office, district):
        places = [p for p in self.datasource()._places() if p[office] == str(district)]
        match = process.extractOne(precinct, [p['place'] for p in places])
        return [p for p in places if p['place'] == match[0]][0]
github JeffersonLab / PyPWA / PyPWA / libs / configuration.py View on Github external
def _correct_keys(parsed: _OPTIONS, template: _TEMPLATE) -> _OPTIONS:
    if _FUZZING:
        corrected = dict()
        correct_keys = list(template.keys())

        for key in parsed.keys():
            # Handle situations where the there are no provided keys
            try:
                fuzz = fuzzywuzzy.process.extractOne(key, correct_keys)
            except RuntimeError:
                fuzz = (0, 0)

            if fuzz[1] >= _FUZZY_STRING_CONFIDENCE_LEVEL:
                found = fuzz[0]
            else:
                _LOGGER.info(f"Failed to find: {key}. Fuzz results: {fuzz!r}")
                found = key

            if found in correct_keys and isinstance(template[found], dict):
                corrected[found] = _correct_keys(parsed[key], template[found])
            else:
                corrected[found] = parsed[key]

        return corrected
    else:
github sheagcraig / yaypis / examples / beautiful_soup / nes2.py View on Github external
def main():
    sda_games = set(get_sda_games())
    print('Found {} games on SDA'.format(len(sda_games)))
    all_games = set(get_all_nes_games())
    print('Found {} games on Wikipedia'.format(len(all_games)))
    fuzzy_sda_games = [
        (game, *process.extractOne(game, all_games)) for
        game in sda_games]
    matched_sda_games = {
        g[1] for g in fuzzy_sda_games if g[2] >= 90}
    unmatched_sda_games = (g for g in fuzzy_sda_games if g[2] < 90)

    # Set up generator
    color = alternating_color()

    # Output!
    no_records = all_games - matched_sda_games
    colored = [next(color)(name) for name in no_records]
    print(
        'There are {} games with no records on SDA! '
        'Go grind!'.format(len(no_records)))
    print('Games that could be matched:')
github datreant / datreant / src / datreant / metadata.py View on Github external
if isinstance(tag, str):
            tags = [tag]
        else:
            tags = tag

        if scope == 'all':
            choices = self.all
        elif scope == 'any':
            choices = self.any
        else:
            raise ValueError("Scope can only be 'any' or 'all'")

        matches = []

        for tag in tags:
            matches += [i[0] for i in process.extract(tag, choices, limit=None)
                        if i[1] > threshold]

        return tuple(matches)
github eegsynth / eegsynth / module / launchpad / launchpad.py View on Github external
# on windows the input and output are different, on unix they are the same
    # use "input/output" when specified, or otherwise use "device" for both
    try:
        mididevice_input = patch.getstring('midi', 'input')
        mididevice_input = EEGsynth.trimquotes(mididevice_input)
    except:
        mididevice_input = patch.getstring('midi', 'device') # fallback
        mididevice_input = EEGsynth.trimquotes(mididevice_input)
    try:
        mididevice_output = patch.getstring('midi', 'output')
        mididevice_output = EEGsynth.trimquotes(mididevice_output)
    except:
        mididevice_output = patch.getstring('midi', 'device') # fallback
        mididevice_output = EEGsynth.trimquotes(mididevice_output)

    mididevice_input  = process.extractOne(mididevice_input, mido.get_input_names())[0] # select the closest match
    mididevice_output = process.extractOne(mididevice_output, mido.get_output_names())[0] # select the closest match

    try:
        inputport = mido.open_input(mididevice_input)
        monitor.success('Connected to MIDI input')
    except:
        raise RuntimeError("cannot connect to MIDI input")

    try:
        outputport = mido.open_output(mididevice_output)
        monitor.success('Connected to MIDI output')
    except:
        raise RuntimeError("cannot connect to MIDI output")

    # channel 1-16 in the ini file should be mapped to 0-15
    if not midichannel is None:
github Run1e / AceBot / cogs / ahk / ahk.py View on Github external
if entry:
			sql += 'INNER JOIN docs_entry ON docs_name.docs_id = docs_entry.id '

		if syntax:
			sql += 'LEFT OUTER JOIN docs_syntax ON docs_name.docs_id = docs_syntax.docs_id '

		sql += 'ORDER BY word_similarity(name, $1) DESC, LOWER(name)=$1 DESC LIMIT $2'

		# get 8 closes matches according to trigram matching
		matches = await self.db.fetch(sql, query, max(count, 8))

		if not matches:
			return results

		# further fuzzy search it using fuzzywuzzy ratio matching
		fuzzed = process.extract(
			query=query,
			choices=[tup.get('name') for tup in matches],
			scorer=fuzz.ratio,
			limit=count
		)

		if not fuzzed:
			return results

		for res in fuzzed:
			for match in matches:
				if res[0] == match.get('name') and match.get('id') not in already_added:
					results.append(match)
					already_added.add(match.get('id'))

		return results