How to use langcodes - 10 common examples

To help you get started, we’ve selected a few langcodes examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dimagi / commcare-hq / corehq / apps / app_manager / app_strings.py View on Github external
numeric_nav_on = app.profile.get('properties', {}).get('cc-entry-mode') == 'cc-entry-review'
            if app.profile.get('features', {}).get('sense') == 'true' or numeric_nav_on:
                text = "${0} %s" % (text,) if not (text and text[0].isdigit()) else text
        return text

    langs = [lang] + app.langs
    yield id_strings.homescreen_title(), app.name
    yield id_strings.app_display_name(), app.name

    yield 'cchq.case', "Case"
    yield 'cchq.referral', "Referral"

    if for_default:
        # include language code names and current language
        for lc in app.langs:
            name = langcodes.get_name(lc) or lc
            if not name:
                continue
            with localize(convert_to_two_letter_code(lc)):
                name = ugettext(name)
            yield lc, name

    yield id_strings.current_language(), lang

    for module in app.get_modules():
        for detail_type, detail, _ in module.get_details():
            for column in detail.get_columns():
                yield id_strings.detail_column_header_locale(module, detail_type, column), trans(column.header)

                if column.format in ('enum', 'enum-image', 'conditional-enum'):
                    for item in column.enum:
                        yield id_strings.detail_column_enum_variable(
github LuminosoInsight / langcodes / langcodes / tag_parser.py View on Github external
Parse the syntax of a language tag, without looking up anything in the
    registry, yet. Returns a list of (type, value) tuples indicating what
    information will need to be looked up.
    """
    tag = normalize_characters(tag)
    if tag in EXCEPTIONS:
        return [('grandfathered', tag)]
    else:
        # The first subtag is always either the language code, or 'x' to mark
        # the entire tag as private-use. Other subtags are distinguished
        # by their length and format, but the language code is distinguished
        # entirely by the fact that it is required to come first.
        subtags = tag.split('-')
        if subtags[0] == 'x':
            if len(subtags) == 1:
                raise LanguageTagError("'x' is not a language tag on its own")
            else:
                # the entire language tag is private use, but we know that,
                # whatever it is, it fills the "language" slot
                return [('language', tag)]
        elif len(subtags[0]) >= 2:
            return [('language', subtags[0])] + parse_subtags(subtags[1:])
        else:
            subtag_error(subtags[0], 'a language code')
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
def __contains__(self, key):
        return key in self.ATTRIBUTES and getattr(self, key)

    def __repr__(self):
        items = []
        for attr in self.ATTRIBUTES:
            if getattr(self, attr):
                items.append('{0}={1!r}'.format(attr, getattr(self, attr)))
        return "Language.make({})".format(', '.join(items))

    def __str__(self):
        return self.to_tag()


# Make the get(), find(), and find_name() functions available at the top level
get = Language.get
find = Language.find
find_name = Language.find_name

# Make the Language object available under the old name LanguageData
LanguageData = Language


def standardize_tag(tag: {str, Language}, macro: bool=False) -> str:
    """
    Standardize a language tag:

    - Replace deprecated values with their updated versions (if those exist)
    - Remove script tags that are redundant with the language
    - If *macro* is True, use a macrolanguage to represent the most common
      standardized language within that macrolanguage. For example, 'cmn'
      (Mandarin) becomes 'zh' (Chinese), and 'arb' (Modern Standard Arabic)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
36

    Comparing Swiss German ('gsw') to standardized German ('de') shows how
    these scores can be asymmetrical. Swiss German speakers will understand
    German, so the score in that direction is 92. Most German speakers find
    Swiss German unintelligible, and CLDR in fact assigns this a score of 16.

    This seems a little bit extreme, but the asymmetry is certainly there. And
    if your text is tagged as 'gsw', it must be that way for a reason.

    >>> tag_match_score('gsw', 'de')
    92
    >>> tag_match_score('de', 'gsw')
    16
    """
    desired_ld = Language.get(desired)
    supported_ld = Language.get(supported)
    return desired_ld.match_score(supported_ld)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
Language.get(norm, normalize).to_dict()
                    )
                else:
                    data.setdefault('extlangs', []).append(value)
            elif typ in {'extlang', 'variant', 'extension'}:
                data.setdefault(typ + 's', []).append(value)
            elif typ == 'language':
                if value == 'und':
                    pass
                elif normalize:
                    replacement = LANGUAGE_REPLACEMENTS.get(value.lower())
                    if replacement is not None:
                        # parse the replacement if necessary -- this helps with
                        # Serbian and Moldovan
                        data.update(
                            Language.get(replacement, normalize).to_dict()
                        )
                    else:
                        data['language'] = value
                else:
                    data['language'] = value
            elif typ == 'region':
                if normalize:
                    data['region'] = REGION_REPLACEMENTS.get(value.lower(), value)
                else:
                    data['region'] = value
            elif typ == 'grandfathered':
                # If we got here, we got a grandfathered tag but we were asked
                # not to normalize it, or the CLDR data doesn't know how to
                # normalize it. The best we can do is set the entire tag as the
                # language.
                data['language'] = value
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
"""

        # No matter what form of language we got, normalize it to a single
        # language subtag
        if isinstance(language, Language):
            language = language.language
        elif isinstance(language, str):
            language = get(language).language
        if language is None:
            language = 'und'

        code = name_to_code(tagtype, name, language)
        if code is None:
            raise LookupError("Can't find any %s named %r" % (tagtype, name))
        if '-' in code:
            return Language.get(code)
        else:
            data = {tagtype: code}
            return Language.make(**data)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
>>> standardize_tag('ja-latn-hepburn')
    'ja-Latn-hepburn'

    >>> standardize_tag('spa-latn-mx')
    'es-MX'

    If the tag can't be parsed according to BCP 47, this will raise a
    LanguageTagError (a subclass of ValueError):

    >>> standardize_tag('spa-mx-latn')
    Traceback (most recent call last):
        ...
    langcodes.tag_parser.LanguageTagError: This script subtag, 'latn', is out of place. Expected variant, extension, or end of string.
    """
    langdata = Language.get(tag, normalize=True)
    if macro:
        langdata = langdata.prefer_macrolanguage()

    return langdata.simplify_script().to_tag()
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
>>> str(Language.get('und-Arab').maximize())
        'ar-Arab-EG'
        >>> str(Language.get('und-CH').maximize())
        'de-Latn-CH'
        >>> str(Language.make().maximize())    # 'MURICA.
        'en-Latn-US'
        >>> str(Language.get('und-ibe').maximize())
        'en-ibe-Latn-US'
        """
        if self._filled is not None:
            return self._filled

        for broader in self.broaden():
            tag = broader.to_tag()
            if tag in LIKELY_SUBTAGS:
                result = Language.get(LIKELY_SUBTAGS[tag], normalize=False)
                result = result.update(self)
                self._filled = result
                return result

        raise RuntimeError(
            "Couldn't fill in likely values. This represents a problem with "
            "the LIKELY_SUBTAGS data."
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
Comparing Swiss German ('gsw') to standardized German ('de') shows how
    these scores can be asymmetrical. Swiss German speakers will understand
    German, so the score in that direction is 92. Most German speakers find
    Swiss German unintelligible, and CLDR in fact assigns this a score of 16.

    This seems a little bit extreme, but the asymmetry is certainly there. And
    if your text is tagged as 'gsw', it must be that way for a reason.

    >>> tag_match_score('gsw', 'de')
    92
    >>> tag_match_score('de', 'gsw')
    16
    """
    desired_ld = Language.get(desired)
    supported_ld = Language.get(supported)
    return desired_ld.match_score(supported_ld)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
# case normalization that comes from parse_tag() hasn't been applied
        # yet.
        tag_lower = tag.lower()
        if normalize and tag_lower in LANGUAGE_REPLACEMENTS:
            tag = LANGUAGE_REPLACEMENTS[tag_lower]

        components = parse_tag(tag)

        for typ, value in components:
            if typ == 'extlang' and normalize and 'language' in data:
                # smash extlangs when possible
                minitag = '%s-%s' % (data['language'], value)
                norm = LANGUAGE_REPLACEMENTS.get(minitag.lower())
                if norm is not None:
                    data.update(
                        Language.get(norm, normalize).to_dict()
                    )
                else:
                    data.setdefault('extlangs', []).append(value)
            elif typ in {'extlang', 'variant', 'extension'}:
                data.setdefault(typ + 's', []).append(value)
            elif typ == 'language':
                if value == 'und':
                    pass
                elif normalize:
                    replacement = LANGUAGE_REPLACEMENTS.get(value.lower())
                    if replacement is not None:
                        # parse the replacement if necessary -- this helps with
                        # Serbian and Moldovan
                        data.update(
                            Language.get(replacement, normalize).to_dict()
                        )