How to use the normality.normalize function in normality

To help you get started, we’ve selected a few normality examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pudo / normality / tests / test_normality.py View on Github external
def test_empty(self):
        self.assertEqual(None, slugify(None))
        self.assertEqual(None, ascii_text(None))
        self.assertEqual(None, latinize_text(None))
        self.assertEqual(None, normalize(None))
        self.assertEqual(None, normalize(''))
        self.assertEqual(None, normalize(' '))
github alephdata / followthemoney / followthemoney / compare.py View on Github external
def compare_names(left, right):
    result = 0
    left_list = [normalize(n, latinize=True) for n in left.names]
    right_list = [normalize(n, latinize=True) for n in right.names]
    for (left, right) in itertools.product(left_list, right_list):
        similarity = jaro(left, right)
        score = similarity * dampen(2, 20, shortest(left, right))
        result = max(result, score)
    return result
github alephdata / aleph / aleph / logic / names.py View on Github external
def name_tokens(name):
    name = normalize(name, ascii=True)
    if name is None:
        return []
    return [n for n in name.split(' ') if len(n) > 1]
github alephdata / aleph / services / extract-entities / entityextractor / prepare_geonames.py View on Github external
def prepare_geonames():
    with io.open(GEONAMES_RAW_PATH, 'r', encoding='utf-8') as fh:
        with shelve.open(GEONAMES_DB_PATH) as db:
            for row in csv.reader(fh, delimiter='\t'):
                country = normalize(row[8])
                if country is None:
                    continue
                names = set(row[3].split(','))
                names.add(row[1])
                names.add(row[2])
                for name in names:
                    name = normalize(name)
                    if name is None:
                        continue
                    countries = db.get(name)
                    if countries:
                        countries.append(country)
                        db[name] = countries
                    else:
                        db[name] = [country]
            for name in db:
                countries = db[name]
                db[name] = max(set(countries), key=countries.count)
github alephdata / aleph / contrib / play / esnames.py View on Github external
def name_tokens(name):
    name = normality.normalize(name, latinize=True)
    # if len(name) > 2:
    #     return [name]
    # return []
    return [n for n in name.split(' ') if len(n)]
github alephdata / countrynames / countrynames / __init__.py View on Github external
def _normalize_name(country):
    """Clean up a country name before comparison."""
    return normalize(country, latinize=True)
github pudo / jsongraph / jsongraph / query / old_builder.py View on Github external
def filter_value(self, q, filter_stmt):
        if self.node.op == OP_EQ:
            q = q.filter(filter_stmt._value == self.node.value)
        elif self.node.op == OP_NOT:
            q = q.filter(filter_stmt._value != self.node.value)
        elif self.node.op == OP_IN:
            q = q.filter(filter_stmt._value.in_(self.node.data))
        elif self.node.op == OP_NIN:
            q = q.filter(~filter_stmt._value.in_(self.node.data))
        elif self.node.op == OP_LIKE:
            value = '%%%s%%' % normalize(self.node.value)
            q = q.filter(filter_stmt.normalized.like(value))
        return q
github alephdata / aleph / aleph / analyze / corasick_entity.py View on Github external
def match_form(self, text):
        """Turn a string into a form appropriate for name matching."""
        # The goal of this function is not to retain a readable version of the
        # string, but rather to yield a normalised version suitable for
        # comparisons and machine analysis.
        text = normalize(text, lowercase=True, latinize=True)
        if text is None:
            return
        # TODO: this is a weird heuristic, but to avoid overly aggressive
        # matching it may make sense:
        if ' ' not in text:
            return
        return text
github pudo / typecast / typecast / util.py View on Github external
def match_prefix(self, prefix):
        prefix = normalize(prefix)
        if not self.abstract:
            if normalize(self.name).startswith(prefix):
                return True
            elif normalize(self.label).startswith(prefix):
                return True
        return False
github occrp / cronosparser / cronos / parser.py View on Github external
def make_csv_file_name(meta, table, out_folder):
    bank_name = normalize(meta['BankName'], lowercase=False)
    if bank_name is None:
        bank_name = 'Untitled Database'
    table_abbr = normalize(table['abbr'], lowercase=False)
    table_name = normalize(table['name'], lowercase=False)
    file_name = '%s - %s - %s.csv' % (bank_name, table_abbr, table_name)
    return os.path.join(out_folder, file_name)