How to use the jellyfish.metaphone function in jellyfish

To help you get started, we’ve selected a few jellyfish examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github unitedstates / python-us / build.py View on Github external
def pickle_data():

    dbpath = os.path.abspath(os.path.join(PWD, 'data.db'))

    conn = sqlite3.connect(dbpath)
    conn.row_factory = dict_factory

    c = conn.cursor()
    c.execute("""SELECT * FROM states ORDER BY name""")

    states = []

    for row in c:
        row['name_metaphone'] = jellyfish.metaphone(row['name'])
        row['is_territory'] = row['is_territory'] == 1
        row['is_obsolete'] = row['is_obsolete'] == 1
        row['is_contiguous'] = row['is_contiguous'] == 1
        row['is_continental'] = row['is_continental'] == 1
        row['time_zones'] = row['time_zones'].split(',')
        states.append(row)

    pkl_path = os.path.abspath(os.path.join(PWD, 'us', 'states.pkl'))

    with open(pkl_path, 'wb') as pkl_file:
        # Use `protocol=2` to ensure package compatibility with Python 2,
        # even if the `.pkl` file is built under Python 3
        pickle.dump(states, pkl_file, protocol=2)
github unitedstates / python-us / us / states.py View on Github external
exact, case-sensitive comparison against the specified field.

        This method caches non-None results, but can the cache can be bypassed
        with the `use_cache=False` argument.
    """

    matched_state = None

    if field is None:
        if FIPS_RE.match(val):
            field = "fips"
        elif ABBR_RE.match(val):
            val = val.upper()
            field = "abbr"
        else:
            val = jellyfish.metaphone(val)
            field = "name_metaphone"

    # see if result is in cache
    cache_key = f"{field}:{val}"
    if use_cache and cache_key in _lookup_cache:
        matched_state = _lookup_cache[cache_key]

    for state in STATES_AND_TERRITORIES:
        if val == getattr(state, field):
            matched_state = state
            if use_cache:
                _lookup_cache[cache_key] = state

    return matched_state
github J535D165 / recordlinkage / recordlinkage / preprocessing / encoding.py View on Github external
import numpy as np

import pandas


_phonetic_algorithms = [{
    'name': 'Soundex',
    'callback': jellyfish.soundex,
    'argument_names': ['soundex']
}, {
    'name': 'NYSIIS',
    'callback': jellyfish.nysiis,
    'argument_names': ['nysiis', 'nyssis']
}, {
    'name': 'Metaphone',
    'callback': jellyfish.metaphone,
    'argument_names': ['metaphone']
}, {
    'name': 'Match Rating',
    'callback': jellyfish.match_rating_codex,
    'argument_names': ['match_rating', 'match rating', 'matchrating',
                       'match_rating_codex', 'matchratingcodex']
}
]


def _list_phonetic_algorithms():
    """Return list of available phonetic algorithms."""

    return [alg['argument_names'][0] for alg in _phonetic_algorithms]
github unitedstates / python-us / us / models.py View on Github external
# List expected properties out verbosely, to ensure that
        # this list is consistent with every object in data.py
        self.fips = fips
        self.name = name
        self.abbr = abbr
        self.is_territory = is_territory
        self.is_obsolete = is_obsolete
        self.is_contiguous = is_contiguous
        self.is_continental = is_continental
        self.statehood_year = statehood_year
        self.capital = capital
        self.capital_tz = capital_tz
        self.ap_abbr = ap_abbr
        self.time_zones = time_zones

        self.name_metaphone = jellyfish.metaphone(self.name)