How to use the epitran.Epitran function in epitran

To help you get started, we’ve selected a few epitran examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmort27 / epitran / epitran / bin / epitranscribe.py View on Github external
def main(code):
    epi = epitran.Epitran(code)
    for line in sys.stdin:  # pointless
        line = line.decode('utf-8')
        line = unicodedata.normalize('NFD', line.lower())
        line = epi.transliterate(line)
        line = line.encode('utf-8')
        sys.stdout.write(line)
github neulab / cmu-ner / utils / segnerfts / orm_morph.py View on Github external
# dictionary.
#
######################################
setSWords = []
# setSList = open("/home/data/LoReHLT17/internal/Lexicons/orm_lexicon/setS_wordlist.txt", "r")
setSList = open("../utils/segnerfts/res/setS_wordlist.txt", "r")

for line in setSList:
	setSWords.append(line.strip())

def get_freq_dist():
    freq = FreqDist()
    freq.update(brown.words())
    freq.update(setSWords)
    return freq
epi = epitran.Epitran("orm-Latn")
g2p = epi.transliterate

def stripFinalVowel(string):
	if string[-2:] in ["aa", "ee", "ii", "oo", "uu"]:
		return string[:-2]
	elif string[-1] in ["a", "e", "i", "o", "u"]:
		return string[:-1]
	else:
		return string

def get_dictionary(dict_filenames):
    l1_to_l2 = defaultdict(list)
    
    for dict_filename in dict_filenames:
	if os.path.isfile(dict_filename):
    		with open(dict_filename, "r", encoding="utf-8") as fin:
github dmort27 / epitran / epitran / reromanize.py View on Github external
def __init__(self, code, table, decompose=True, cedict_file=None):
        """Construct object for re-romanizing Epitran output.

        This class converts orthographic input, via Epitran, to a more
        conventional romanization that should be more readable to most humans.

        Args:
            code (str): ISO 639-3 code and ISO 15924 code joined with a hyphen
            table (str): Name of re-romanization table
            decompose (bool): apply decomposing normalization
        """
        self.epi = epitran.Epitran(code, cedict_file=cedict_file)
        self.mapping = self._load_reromanizer(table, decompose)
github dmort27 / epitran / epitran / space.py View on Github external
def __init__(self, code, space_names):
        """Construct a Space object

        Space objects take strings (corresponding to segments) and return
        integers, placing them in an integer space that can be translated into
        a one-hot vector.

        The resulting object has a dictionary-like interface that supports
        indexing and iteration over "keys".

        Args:
            code (str): ISO 639-3 code joined to ISO 15924 code with "-"
            space_names (list): list of space names consisting of ISO 639-3
            codes joined to ISO 15924 codes with "-"
        """
        self.epi = Epitran(code)
        self.dict = self._load_space(space_names)
github dmort27 / epitran / epitran / bin / connl2ipaspace.py View on Github external
def main(code, op, infiles, output):
    epi = epitran.Epitran(code)
    ft = panphon.FeatureTable()
    space = Counter()
    for fn in infiles:
        logging.debug(u'Scanning:\t{}'.format(fn).encode('utf-8'))
        add_file = add_file_op if op else add_file_gen
        space.update(add_file(epi, ft, fn))
    print_space(output, space)
github dmort27 / epitran / epitran / vector.py View on Github external
def __init__(self, code, space_names):
        """Constructs VectorWithIPASpace object

        A VectorWithIPASpace object takes orthographic words, via the
        word_to_segs method, and returns a list of tuples consisting of category
        (letter or punctuation), lettercaase, orthographic form, phonetic form,
        id within an IPA space, and articulatory feature vector.

        Args:
            code (str): ISO 639-3 code joined to ISO 15924 code with "-"
            space_names (list): list of space names consisting of ISO 639-3
                                codes joined to ISO 15924 codes with "-"
        """
        self.epi = Epitran(code)
        self.space = Space(code, space_names)