How to use the epitran.epihan.Epihan function in epitran

To help you get started, we’ve selected a few epitran examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmort27 / epitran / epitran / _epitran.py View on Github external
from epitran.epihan import Epihan, EpihanTraditional
from epitran.flite import FliteLexLookup
from epitran.puncnorm import PuncNorm
from epitran.simple import SimpleEpitran
from epitran.xsampa import XSampa

if sys.version_info[0] == 3:
    def unicode(x):
        return x

logging.disable(logging.DEBUG)

class Epitran(object):
    """Unified interface for IPA transliteration/transcription"""
    special = {'eng-Latn': FliteLexLookup,
               'cmn-Hans': Epihan,
               'cmn-Hant': EpihanTraditional}

    def __init__(self, code, preproc=True, postproc=True, ligatures=False, cedict_file=None,
                 rev=False, rev_preproc=True, rev_postproc=True):
        """Construct Epitran transliteration/transcription object

        Args:
            code (str): ISO 639-3 plus "-" plus ISO 15924 code of the
                        language/script pair that should be loaded
            preproc (bool): apply preprocessors
            postproc (bool): apply prostprocessors
            ligatures (bool): use precomposed ligatures instead of standard IPA
            cedict_filename (str): path to file containing the CC-CEDict
                                   dictionary; relevant only for Chinese
            rev (boolean): if True, load reverse transliteration
            rev_preproc (bool): if True, apply preprocessor when reverse transliterating
github dmort27 / epitran / epitran / epihan.py View on Github external
pinyin = u''.join(pinyin).lower()
                ipa = self.rules.apply(pinyin)
                ipa_tokens.append(ipa.replace(u',', u''))
            else:
                if normpunc:
                    token = self.normalize_punc(token)
                ipa_tokens.append(token)
            ipa_tokens = map(ligaturize, ipa_tokens)\
                if ligatures else ipa_tokens
        return u''.join(ipa_tokens)

    def strict_trans(self, text, normpunc=False, ligatures=False):
        return self.transliterate(text, normpunc, ligatures)


class EpihanTraditional(Epihan):
    def __init__(self, ligatures=False, cedict_file=None, rules_file='pinyin-to-ipa.txt'):
        """Construct epitran object for Traditional Chinese

        Args:
            ligatures (bool): if True, use ligatures instead of standard IPA
            cedict_file (str): path to CC-CEDict dictionary file
            rules_file (str): name of file with rules for converting pinyin to
                              IPA
        """
        if not cedict_file:
            raise MissingData('Please specify a location for the CC-CEDict file.')
        rules_file = os.path.join('data', 'rules', rules_file)
        rules_file = pkg_resources.resource_filename(__name__, rules_file)
        self.cedict = cedict.CEDictTrie(cedict_file, traditional=True)
        self.rules = rules.Rules([rules_file])
        self.regexp = re.compile(r'\p{Han}')