How to use the normality.ascii_text function in normality

To help you get started, we’ve selected a few normality examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pudo / normality / tests / test_normality.py View on Github external
def test_ahmad(self):
        text = u'əhməd'
        self.assertEqual('ahmad', ascii_text(text))
github pudo / normality / tests / test_normality.py View on Github external
def test_empty(self):
        self.assertEqual(None, slugify(None))
        self.assertEqual(None, ascii_text(None))
        self.assertEqual(None, latinize_text(None))
        self.assertEqual(None, normalize(None))
        self.assertEqual(None, normalize(''))
        self.assertEqual(None, normalize(' '))
github pudo / normality / tests / test_normality.py View on Github external
def test_azeri(self):
        text = u'FUAD ALIYEV ƏHMƏD OĞLU'
        self.assertEqual('FUAD ALIYEV AHMAD OGLU', ascii_text(text))
github alephdata / aleph / aleph / search / entities.py View on Github external
'match_phrase_prefix': {'name': prefix.strip()}
        }
        if schemas is not None and len(schemas):
            q = add_filter(q, {'terms': {'schema': schemas}})

        # TODO: is this correct? should we allow filter by dataset entities?
        # XXXX broken use $physical
        q = add_filter(q, {'terms': {'collection_id': authz.collections_read}})

        q = {
            'size': size,
            'sort': [{'doc_count': 'desc'}, '_score'],
            'query': q,
            '_source': ['name', 'schema', 'fingerprints', 'doc_count']
        }
        ref = ascii_text(prefix)
        result = es.search(index=es_index, doc_type=TYPE_ENTITY, body=q)
        for res in result.get('hits', {}).get('hits', []):
            ent = res.get('_source')
            terms = [ascii_text(t) for t in ent.pop('fingerprints', [])]
            ent['match'] = ref in terms
            ent['score'] = res.get('_score')
            ent['id'] = res.get('_id')
            options.append(ent)
    return {
        'prefix': prefix,
        'results': options
    }
github alephdata / aleph / services / ingest-file / ingestors / support / email.py View on Github external
def __init__(self, manager, name, email):
        self.email = ascii_text(stringify(email))
        self.name = stringify(name)
        if not registry.email.validate(self.email):
            self.email = None
        if registry.email.validate(self.name):
            self.email = self.email or ascii_text(self.name)
            self.name = None

        # This should be using formataddr, but I cannot figure out how
        # to use that without encoding the name.
        self.label = None
        if self.name is not None and self.email is not None:
            self.label = '%s <%s>' % (self.name, self.email)
        elif self.name is None and self.email is not None:
            self.label = self.email
        elif self.email is None and self.name is not None:
            self.label = self.name
github alephdata / aleph / aleph / search / entities.py View on Github external
# TODO: is this correct? should we allow filter by dataset entities?
        # XXXX broken use $physical
        q = add_filter(q, {'terms': {'collection_id': authz.collections_read}})

        q = {
            'size': size,
            'sort': [{'doc_count': 'desc'}, '_score'],
            'query': q,
            '_source': ['name', 'schema', 'fingerprints', 'doc_count']
        }
        ref = ascii_text(prefix)
        result = es.search(index=es_index, doc_type=TYPE_ENTITY, body=q)
        for res in result.get('hits', {}).get('hits', []):
            ent = res.get('_source')
            terms = [ascii_text(t) for t in ent.pop('fingerprints', [])]
            ent['match'] = ref in terms
            ent['score'] = res.get('_score')
            ent['id'] = res.get('_id')
            options.append(ent)
    return {
        'prefix': prefix,
        'results': options
    }
github alephdata / aleph / services / ingest-file / ingestors / support / email.py View on Github external
def __init__(self, manager, name, email):
        self.email = ascii_text(stringify(email))
        self.name = stringify(name)
        if not registry.email.validate(self.email):
            self.email = None
        if registry.email.validate(self.name):
            self.email = self.email or ascii_text(self.name)
            self.name = None

        # This should be using formataddr, but I cannot figure out how
        # to use that without encoding the name.
        self.label = None
        if self.name is not None and self.email is not None:
            self.label = '%s <%s>' % (self.name, self.email)
        elif self.name is None and self.email is not None:
            self.label = self.email
        elif self.email is None and self.name is not None:
            self.label = self.name

        self.entity = None
        if self.email is not None:
            key = self.email.lower().strip()
            fragment = safe_fragment(self.label)
github alephdata / aleph / aleph / search / fragments.py View on Github external
def text_query_string(text, literal=False):
    if text is None or not len(text.strip()):
        return match_all()
    if literal:
        text = '"%s"' % ascii_text(text)
    return {
        'query_string': {
            'query': text,
            'fields': ['text'],
            'default_operator': 'AND',
            'use_dis_max': True
        }
github alephdata / aleph / aleph / schema / types.py View on Github external
def normalize_value(self, value):
        value = collapse_spaces(value)
        return value, ascii_text(value)
github alephdata / aleph / aleph / datasets / util.py View on Github external
# Add inverted properties. This takes all the properties
        # of a specific type (names, dates, emails etc.)
        invert = prop.type.index_invert
        if invert:
            if invert not in data:
                data[invert] = []
            for norm in prop.type.normalize(values):
                if norm not in data[invert]:
                    data[invert].append(norm)

    data['fingerprints'] = list(set(data['fingerprints']))

    # Add latinised names
    names = data.get('names', [])
    for name in list(names):
        names.append(ascii_text(name))
    data['names'] = list(set(names))

    # Get implied schemata (i.e. parents of the actual schema)
    data['schema'] = schema.name
    data['schemata'] = [p.name for p in schema.schemata if not p.hidden]

    # Second name field for non-tokenised sorting.
    if 'name' in data:
        data['name_sort'] = data.get('name')
    return data