How to use the langcodes.__init__.Language.get function in langcodes

To help you get started, we’ve selected a few langcodes examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
def __contains__(self, key):
        return key in self.ATTRIBUTES and getattr(self, key)

    def __repr__(self):
        items = []
        for attr in self.ATTRIBUTES:
            if getattr(self, attr):
                items.append('{0}={1!r}'.format(attr, getattr(self, attr)))
        return "Language.make({})".format(', '.join(items))

    def __str__(self):
        return self.to_tag()


# Make the get(), find(), and find_name() functions available at the top level
get = Language.get
find = Language.find
find_name = Language.find_name

# Make the Language object available under the old name LanguageData
LanguageData = Language


def standardize_tag(tag: {str, Language}, macro: bool=False) -> str:
    """
    Standardize a language tag:

    - Replace deprecated values with their updated versions (if those exist)
    - Remove script tags that are redundant with the language
    - If *macro* is True, use a macrolanguage to represent the most common
      standardized language within that macrolanguage. For example, 'cmn'
      (Mandarin) becomes 'zh' (Chinese), and 'arb' (Modern Standard Arabic)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
36

    Comparing Swiss German ('gsw') to standardized German ('de') shows how
    these scores can be asymmetrical. Swiss German speakers will understand
    German, so the score in that direction is 92. Most German speakers find
    Swiss German unintelligible, and CLDR in fact assigns this a score of 16.

    This seems a little bit extreme, but the asymmetry is certainly there. And
    if your text is tagged as 'gsw', it must be that way for a reason.

    >>> tag_match_score('gsw', 'de')
    92
    >>> tag_match_score('de', 'gsw')
    16
    """
    desired_ld = Language.get(desired)
    supported_ld = Language.get(supported)
    return desired_ld.match_score(supported_ld)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
"""

        # No matter what form of language we got, normalize it to a single
        # language subtag
        if isinstance(language, Language):
            language = language.language
        elif isinstance(language, str):
            language = get(language).language
        if language is None:
            language = 'und'

        code = name_to_code(tagtype, name, language)
        if code is None:
            raise LookupError("Can't find any %s named %r" % (tagtype, name))
        if '-' in code:
            return Language.get(code)
        else:
            data = {tagtype: code}
            return Language.make(**data)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
Comparing Swiss German ('gsw') to standardized German ('de') shows how
    these scores can be asymmetrical. Swiss German speakers will understand
    German, so the score in that direction is 92. Most German speakers find
    Swiss German unintelligible, and CLDR in fact assigns this a score of 16.

    This seems a little bit extreme, but the asymmetry is certainly there. And
    if your text is tagged as 'gsw', it must be that way for a reason.

    >>> tag_match_score('gsw', 'de')
    92
    >>> tag_match_score('de', 'gsw')
    16
    """
    desired_ld = Language.get(desired)
    supported_ld = Language.get(supported)
    return desired_ld.match_score(supported_ld)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
# case normalization that comes from parse_tag() hasn't been applied
        # yet.
        tag_lower = tag.lower()
        if normalize and tag_lower in LANGUAGE_REPLACEMENTS:
            tag = LANGUAGE_REPLACEMENTS[tag_lower]

        components = parse_tag(tag)

        for typ, value in components:
            if typ == 'extlang' and normalize and 'language' in data:
                # smash extlangs when possible
                minitag = '%s-%s' % (data['language'], value)
                norm = LANGUAGE_REPLACEMENTS.get(minitag.lower())
                if norm is not None:
                    data.update(
                        Language.get(norm, normalize).to_dict()
                    )
                else:
                    data.setdefault('extlangs', []).append(value)
            elif typ in {'extlang', 'variant', 'extension'}:
                data.setdefault(typ + 's', []).append(value)
            elif typ == 'language':
                if value == 'und':
                    pass
                elif normalize:
                    replacement = LANGUAGE_REPLACEMENTS.get(value.lower())
                    if replacement is not None:
                        # parse the replacement if necessary -- this helps with
                        # Serbian and Moldovan
                        data.update(
                            Language.get(replacement, normalize).to_dict()
                        )