How to use the jaconv.kata2hira function in jaconv

To help you get started, we’ve selected a few jaconv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github MikimotoH / furigana / furigana / furigana.py View on Github external
"""
    mecab = MeCab.Tagger("-Ochasen")
    mecab.parse('') # 空でパースする必要がある
    node = mecab.parseToNode(text)
    ret = []

    while node is not None:
        origin = node.surface # もとの単語を代入
        if not origin:
            node = node.next
            continue

        # originが空のとき、漢字以外の時はふりがなを振る必要がないのでそのまま出力する
        if origin != "" and any(is_kanji(_) for _ in origin):
            kana = node.feature.split(",")[7] # 読み仮名を代入
            hiragana = jaconv.kata2hira(kana)
            for pair in split_okurigana(origin, hiragana):
                ret += [pair]
        else:
            if origin:
                ret += [(origin,)]
        node = node.next
    return ret
github hirofumi0810 / asr_preprocessing / csj / labels / target.py View on Github external
with open(kanji_map_file_path, 'w') as f:
            # Reserve index
            all_char_set.discard('N')
            all_char_set.discard('Z')
            all_char_set.discard('_')
            if model == 'attention':
                all_char_set.discard('<')
                all_char_set.discard('>')

            kanji_set = set([])
            for char in all_char_set:
                if (not is_hiragana(char)) and (not is_katakana(char)):
                    kanji_set.add(char)
            for kana in kana_list:
                kanji_set.add(kana)
                kanji_set.add(jaconv.kata2hira(kana))

            if model == 'ctc':
                kanji_list = ['_', 'NZ'] + sorted(list(kanji_set))
            elif model == 'attention':
                kanji_list = ['_', '<', '>', 'NZ'] + sorted(list(kanji_set))
            for i, kanji in enumerate(kanji_list):
                f.write('%s  %s\n' % (kanji, str(i)))

        # kana
        with open(kana_map_file_path, 'w') as f:
            if model == 'ctc':
                kana_list = ['_', 'NZ'] + kana_list
            elif model == 'attention':
                kana_list = ['_', '<', '>', 'NZ'] + kana_list
            for i, kana in enumerate(kana_list):
                f.write('%s  %s\n' % (kana, str(i)))
github hirofumi0810 / asr_preprocessing / csj / labels / attention / character.py View on Github external
# kanji
        with open(kanji_map_file_path, 'w') as f:
            # インデックスを予約するラベル
            all_char_set.discard('N')
            all_char_set.discard('Z')
            all_char_set.discard('_')
            all_char_set.discard('<')
            all_char_set.discard('>')

            kanji_set = set([])
            for char in all_char_set:
                if (not is_hiragana(char)) and (not is_katakana(char)):
                    kanji_set.add(char)
            for kana in kana_list:
                kanji_set.add(kana)
                kanji_set.add(jaconv.kata2hira(kana))
            # NOTE: 頻出するラベルにはなるべく小さいインデックスを与える
            kanji_list = ['_', '<', '>', 'NZ'] + sorted(list(kanji_set))
            for index, kanji in enumerate(kanji_list):
                f.write('%s  %s\n' % (kanji, str(index)))

        # kana
        with open(kana_map_file_path, 'w') as f:
            kana_list = ['_', '<', '>', 'NZ'] + kana_list
            for index, kana in enumerate(kana_list):
                f.write('%s  %s\n' % (kana, str(index)))

        # phone
        with open(phone_map_file_path, 'w') as f:
            phone_list = ['_',  '<', '>', 'NZ'] + sorted(list(phone_set))
            for index, phone in enumerate(phone_list):
                f.write('%s  %s\n' % (phone, str(index)))
github patarapolw / WaniKaniTools / example / multiple_readings.py View on Github external
import jaconv
from WaniKaniTools.api import APIv2


if __name__ == '__main__':
    api_v2 = APIv2()
    result = api_v2.GET('subjects', params={'types':'vocabulary'})

    # print(json.dumps(result, indent=4))
    while True:
        for data in result['data']:
            reading_array = set()
            for reading in data["data"]["readings"]:
                # if reading['primary']:
                    reading_array.add(jaconv.kata2hira(reading['reading']))

            if len(reading_array) == 1:
                continue

            meaning_array = []
            for meaning in data["data"]["meanings"]:
                if meaning['primary']:
                    meaning_array += [meaning['meaning']]

            to_print = (
                data["data"]["characters"] if "characters" in data["data"] else data["data"]["character"],
                ', '.join(reading_array),
                ', '.join(meaning_array)
            )
            print('\t'.join(to_print))