How to use the langcodes.__init__.Language function in langcodes

To help you get started, we’ve selected a few langcodes examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
Language.get(norm, normalize).to_dict()
                    )
                else:
                    data.setdefault('extlangs', []).append(value)
            elif typ in {'extlang', 'variant', 'extension'}:
                data.setdefault(typ + 's', []).append(value)
            elif typ == 'language':
                if value == 'und':
                    pass
                elif normalize:
                    replacement = LANGUAGE_REPLACEMENTS.get(value.lower())
                    if replacement is not None:
                        # parse the replacement if necessary -- this helps with
                        # Serbian and Moldovan
                        data.update(
                            Language.get(replacement, normalize).to_dict()
                        )
                    else:
                        data['language'] = value
                else:
                    data['language'] = value
            elif typ == 'region':
                if normalize:
                    data['region'] = REGION_REPLACEMENTS.get(value.lower(), value)
                else:
                    data['region'] = value
            elif typ == 'grandfathered':
                # If we got here, we got a grandfathered tag but we were asked
                # not to normalize it, or the CLDR data doesn't know how to
                # normalize it. The best we can do is set the entire tag as the
                # language.
                data['language'] = value
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
>>> standardize_tag('ja-latn-hepburn')
    'ja-Latn-hepburn'

    >>> standardize_tag('spa-latn-mx')
    'es-MX'

    If the tag can't be parsed according to BCP 47, this will raise a
    LanguageTagError (a subclass of ValueError):

    >>> standardize_tag('spa-mx-latn')
    Traceback (most recent call last):
        ...
    langcodes.tag_parser.LanguageTagError: This script subtag, 'latn', is out of place. Expected variant, extension, or end of string.
    """
    langdata = Language.get(tag, normalize=True)
    if macro:
        langdata = langdata.prefer_macrolanguage()

    return langdata.simplify_script().to_tag()
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
>>> str(Language.get('und-Arab').maximize())
        'ar-Arab-EG'
        >>> str(Language.get('und-CH').maximize())
        'de-Latn-CH'
        >>> str(Language.make().maximize())    # 'MURICA.
        'en-Latn-US'
        >>> str(Language.get('und-ibe').maximize())
        'en-ibe-Latn-US'
        """
        if self._filled is not None:
            return self._filled

        for broader in self.broaden():
            tag = broader.to_tag()
            if tag in LIKELY_SUBTAGS:
                result = Language.get(LIKELY_SUBTAGS[tag], normalize=False)
                result = result.update(self)
                self._filled = result
                return result

        raise RuntimeError(
            "Couldn't fill in likely values. This represents a problem with "
            "the LIKELY_SUBTAGS data."
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
for attr in self.ATTRIBUTES:
            if getattr(self, attr):
                items.append('{0}={1!r}'.format(attr, getattr(self, attr)))
        return "Language.make({})".format(', '.join(items))

    def __str__(self):
        return self.to_tag()


# Make the get(), find(), and find_name() functions available at the top level
get = Language.get
find = Language.find
find_name = Language.find_name

# Make the Language object available under the old name LanguageData
LanguageData = Language


def standardize_tag(tag: {str, Language}, macro: bool=False) -> str:
    """
    Standardize a language tag:

    - Replace deprecated values with their updated versions (if those exist)
    - Remove script tags that are redundant with the language
    - If *macro* is True, use a macrolanguage to represent the most common
      standardized language within that macrolanguage. For example, 'cmn'
      (Mandarin) becomes 'zh' (Chinese), and 'arb' (Modern Standard Arabic)
      becomes 'ar' (Arabic).
    - Format the result according to the conventions of BCP 47

    Macrolanguage replacement is not required by BCP 47, but it is required
    by the Unicode CLDR.
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
def _get_name(self, attribute: str, language, min_score: int):
        assert attribute in self.ATTRIBUTES
        if isinstance(language, Language):
            language = language.to_tag()

        attr_value = getattr(self, attribute)
        if attr_value is None:
            return None
        names = code_to_names(attribute, attr_value)
        names['und'] = getattr(self, attribute)
        return self._best_name(names, language, min_score)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
def update(self, other: 'Language') -> 'Language':
        """
        Update this Language with the fields of another Language.
        """
        return Language.make(
            language=other.language or self.language,
            extlangs=other.extlangs or self.extlangs,
            script=other.script or self.script,
            region=other.region or self.region,
            variants=other.variants or self.variants,
            extensions=other.extensions or self.extensions,
            private=other.private or self.private
        )
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
return key in self.ATTRIBUTES and getattr(self, key)

    def __repr__(self):
        items = []
        for attr in self.ATTRIBUTES:
            if getattr(self, attr):
                items.append('{0}={1!r}'.format(attr, getattr(self, attr)))
        return "Language.make({})".format(', '.join(items))

    def __str__(self):
        return self.to_tag()


# Make the get(), find(), and find_name() functions available at the top level
get = Language.get
find = Language.find
find_name = Language.find_name

# Make the Language object available under the old name LanguageData
LanguageData = Language


def standardize_tag(tag: {str, Language}, macro: bool=False) -> str:
    """
    Standardize a language tag:

    - Replace deprecated values with their updated versions (if those exist)
    - Remove script tags that are redundant with the language
    - If *macro* is True, use a macrolanguage to represent the most common
      standardized language within that macrolanguage. For example, 'cmn'
      (Mandarin) becomes 'zh' (Chinese), and 'arb' (Modern Standard Arabic)
      becomes 'ar' (Arabic).
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
>>> Language.find('brazilian portuguese')
        Language.make(language='pt', region='BR')
        >>> Language.find('simplified chinese')
        Language.make(language='zh', script='Hans')

        Some language names are ambiguous: for example, there is a language
        named 'Fala' in English (with code 'fax'), but 'Fala' is also the
        Kwasio word for French. In this case, specifying the language that
        the name is in is necessary for disambiguation.

        >>> Language.find('fala')
        Language.make(language='fr')
        >>> Language.find('fala', 'en')
        Language.make(language='fax')
        """
        return Language.find_name('language', name, language)
github LuminosoInsight / langcodes / langcodes / __init__.py View on Github external
The language tag 'sh' (Serbo-Croatian) ended up being politically
        problematic, and different standards took different steps to address
        this. The IANA made it into a macrolanguage that contains 'sr', 'hr',
        and 'bs'. Unicode further decided that it's a legacy tag that should
        be interpreted as 'sr-Latn', which the language matching rules say
        is mutually intelligible with all those languages.

        We complicate the example by adding on the region tag 'QU', an old
        provisional tag for the European Union, which is now standardized as
        'EU'.

        >>> Language.get('sh-QU')
        Language.make(language='sr', script='Latn', region='EU')
        """
        if isinstance(tag, Language):
            if not normalize:
                # shortcut: we have the tag already
                return tag
            
            # We might need to normalize this tag. Convert it back into a
            # string tag, to cover all the edge cases of normalization in a
            # way that we've already solved.
            tag = tag.to_tag()

        if (tag, normalize) in Language._PARSE_CACHE:
            return Language._PARSE_CACHE[tag, normalize]

        data = {}
        # if the complete tag appears as something to normalize, do the
        # normalization right away. Smash case when checking, because the
        # case normalization that comes from parse_tag() hasn't been applied