How to use the pypinyin.NORMAL function in pypinyin

To help you get started, we’ve selected a few pypinyin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_pinyin_initials():
    """包含声明和韵母的词语"""
    hans = '中心'
    # 默认风格,带声调
    assert pinyin(hans) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 普通风格,不带声调
    assert pinyin(hans, NORMAL) == [['zhong'], ['xin']]
    assert pinyin(hans, NORMAL, strict=False) == [['zhong'], ['xin']]
    # 声调风格,拼音声调在韵母第一个字母上
    assert pinyin(hans, TONE) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, TONE, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 声调风格2,即拼音声调在各个声母之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE2) == [['zho1ng'], ['xi1n']]
    assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
    # 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
    assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
    # 声母风格,只返回各个拼音的声母部分
    assert pinyin(hans, INITIALS) == [['zh'], ['x']]
    assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
    # 首字母风格,只返回拼音的首字母部分
    assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
    assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
github mozillazg / python-pinyin / tests / test_standard.py View on Github external
['居', dict(style=FINALS), ['v']],
    ['居', dict(style=FINALS, strict=False), ['u']],
    ['区', dict(style=NORMAL), ['qu']],
    ['区', dict(style=FINALS), ['v']],
    ['区', dict(style=FINALS, strict=False), ['u']],
    ['虚', dict(style=NORMAL), ['xu']],
    ['虚', dict(style=FINALS), ['v']],
    ['虚', dict(style=FINALS, strict=False), ['u']],
    ['女', dict(style=NORMAL), ['nv']],
    ['女', dict(style=FINALS), ['v']],
    ['女', dict(style=FINALS, strict=False), ['v']],
    ['吕', dict(style=NORMAL), ['lv']],
    ['吕', dict(style=FINALS), ['v']],
    ['吕', dict(style=FINALS, strict=False), ['v']],

    ['具', dict(style=NORMAL), ['ju']],
    ['具', dict(style=NORMAL, strict=False), ['ju']],
    ['具', dict(style=TONE), ['jù']],
    ['具', dict(style=TONE, strict=False), ['jù']],
    ['具', dict(style=TONE2), ['ju4']],
    ['具', dict(style=TONE2, strict=False), ['ju4']],
    ['具', dict(style=TONE3), ['ju4']],
    ['具', dict(style=TONE3, strict=False), ['ju4']],
    ['具', dict(style=INITIALS), ['j']],
    ['具', dict(style=INITIALS, strict=False), ['j']],
    ['具', dict(style=FIRST_LETTER), ['j']],
    ['具', dict(style=FIRST_LETTER, strict=False), ['j']],
    ['具', dict(style=FINALS), ['v']],
    ['具', dict(style=FINALS, strict=False), ['u']],
    ['具', dict(style=FINALS_TONE), ['ǜ']],
    ['具', dict(style=FINALS_TONE, strict=False), ['ù']],
    ['具', dict(style=FINALS_TONE2), ['v4']],
github mozillazg / python-pinyin / tests / test_standard.py View on Github external
['儿', dict(style=FINALS), ['er']],
]


@pytest.mark.parametrize('hans, kwargs, result', data_for_finals)
def test_finals(hans, kwargs, result):
    assert lazy_pinyin(hans, **kwargs) == result
    assert pinyin(hans, **kwargs) == [result]


# 零声母
data_for_zero_consonant = [
    # i行的韵母,前面没有声母的时候,写成yi(衣),ya(呀),ye(耶),yao(腰),
    # you(忧),yan(烟),yin(因),yang(央),ying(英),yong(雍)。

    ['衣', dict(style=NORMAL), ['yi']],
    ['衣', dict(style=FINALS), ['i']],
    ['衣', dict(style=FINALS, strict=False), ['i']],
    ['呀', dict(style=NORMAL), ['ya']],
    ['呀', dict(style=FINALS), ['ia']],
    ['呀', dict(style=FINALS, strict=False), ['a']],
    ['耶', dict(style=NORMAL), ['ye']],
    ['耶', dict(style=FINALS), ['ie']],
    ['耶', dict(style=FINALS, strict=False), ['e']],
    ['腰', dict(style=NORMAL), ['yao']],
    ['腰', dict(style=FINALS), ['iao']],
    ['腰', dict(style=FINALS, strict=False), ['ao']],
    ['忧', dict(style=NORMAL), ['you']],
    ['忧', dict(style=FINALS), ['iou']],
    ['忧', dict(style=FINALS, strict=False), ['ou']],
    ['烟', dict(style=NORMAL), ['yan']],
    ['烟', dict(style=FINALS), ['ian']],
github LiuRoy / Pinyin_Demo / pinyin / hmm / train.py View on Github external
def init_emission():
    """
    初始化发射概率
    """
    character_pinyin_map = {}
    for phrase, frequency in iter_dict():
        pinyins = pinyin(phrase, style=NORMAL)
        for character, py in zip(phrase, pinyins):
            character_pinyin_count = len(py)
            if character not in character_pinyin_map:
                character_pinyin_map[character] = \
                    {x: frequency/character_pinyin_count for x in py}
            else:
                pinyin_freq_map = character_pinyin_map[character]
                for x in py:
                    pinyin_freq_map[x] = pinyin_freq_map.get(x, 0) + \
                                         frequency/character_pinyin_count

    for character, pinyin_map in character_pinyin_map.iteritems():
        sum_frequency = sum(pinyin_map.values())
        for py, frequency in pinyin_map.iteritems():
            Emission.add(character, py, log(frequency/sum_frequency))
github shibing624 / pycorrector / pycorrector / detect.py View on Github external
def get_homophones_by_char(input_char):
    """
    根据汉字取同音字
    :param input_char:
    :return:
    """
    result = []
    # CJK统一汉字区的范围是0x4E00-0x9FA5,也就是我们经常提到的20902个汉字
    for i in range(0x4e00, 0x9fa6):
        if pinyin([chr(i)], style=pypinyin.NORMAL)[0][0] == pinyin(input_char, style=pypinyin.NORMAL)[0][0]:
            result.append(chr(i))
    return result
github Whoisurdady / WeChatRoomGame / game_chengyu.py View on Github external
def check_pinyin_same(input, match_word):
    input_pinyin = pypinyin.pinyin(input, style=pypinyin.NORMAL)[0][0]
    match_word_pinyin = pypinyin.pinyin(match_word, style=pypinyin.NORMAL)[3][0]
    return input_pinyin == match_word_pinyin
github Whoisurdady / WeChatRoomGame / game_chengyu.py View on Github external
def check_pinyin_same(input, match_word):
    input_pinyin = pypinyin.pinyin(input, style=pypinyin.NORMAL)[0][0]
    match_word_pinyin = pypinyin.pinyin(match_word, style=pypinyin.NORMAL)[3][0]
    return input_pinyin == match_word_pinyin
github shibing624 / pycorrector / pycorrector / utils / text_utils.py View on Github external
def get_homophones_by_char(input_char):
    """
    根据汉字取同音字
    :param input_char:
    :return:
    """
    result = []
    # CJK统一汉字区的范围是0x4E00-0x9FA5,也就是我们经常提到的20902个汉字
    for i in range(0x4e00, 0x9fa6):
        if pinyin([chr(i)], style=pypinyin.NORMAL)[0][0] == pinyin(input_char, style=pypinyin.NORMAL)[0][0]:
            result.append(chr(i))
    return result
github junzew / HanTTS / script.py View on Github external
l = list(map(lambda x: pinyin(x, heteronym=True,strict=True,style=pypinyin.NORMAL), chars))
# flatten list