How to use pypinyin - 10 common examples

To help you get started, we’ve selected a few pypinyin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozillazg / phrase-pinyin-data / parse_latest_cc_cedict.py View on Github external
# -*- coding: utf-8 -*-

import os
import io
import re
import codecs
from pypinyin.phonetic_symbol import phonetic_symbol
from pypinyin.pinyin_dict import pinyin_dict
from pypinyin.style.tone import ToneConverter

ROOT = os.path.dirname(os.path.realpath(__file__))


tone_converter = ToneConverter()
tone3_2_tone_dict = {}
for k, v in pinyin_dict.items():
    parts = v.split(',')
    for part in parts:
        part = part.strip()
        if part:
            tone3 = tone_converter.to_tone3(part).strip().lower()
            if tone3:
                tone3_2_tone_dict[tone3] = part


def tone3_to_tone1(tone3):
    tone3 = tone3.strip().lower()
    # 儿化
    if tone3 == 'r5':
        return 'er'
    # 轻声
    if '5' in tone3:
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_pinyin_finals():
    """只包含韵母的词语"""
    hans = '嗷嗷'
    assert pinyin(hans) == [['\xe1o'], ['\xe1o']]
    assert pinyin(hans + 'abc') == [['\xe1o'], ['\xe1o'], ['abc']]
    assert pinyin(hans, NORMAL) == [['ao'], ['ao']]
    assert pinyin(hans, TONE) == [['\xe1o'], ['\xe1o']]
    assert pinyin(hans, TONE2) == [['a2o'], ['a2o']]
    assert pinyin(hans, TONE3) == [['ao2'], ['ao2']]
    assert pinyin(hans, INITIALS) == [[''], ['']]
    assert pinyin(hans, FIRST_LETTER) == [['a'], ['a']]
    assert pinyin(hans, BOPOMOFO) == [['ㄠˊ'], ['ㄠˊ']]
    assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄠ'], ['ㄠ']]
    assert pinyin(hans, CYRILLIC) == [['ао2'], ['ао2']]
    assert pinyin(hans, CYRILLIC_FIRST) == [['а'], ['а']]
    assert pinyin(hans, heteronym=True) == [['\xe1o'], ['\xe1o']]
    assert pinyin('啊', heteronym=True) == \
        [['a', 'è', 'ā', 'á', 'ǎ', 'à']]
    assert pinyin(hans, style=FINALS) == [['ao'], ['ao']]
    assert pinyin(hans, style=FINALS_TONE) == [['\xe1o'], ['\xe1o']]
    assert pinyin(hans, style=FINALS_TONE2) == [['a2o'], ['a2o']]
    assert pinyin(hans, style=FINALS_TONE3) == [['ao2'], ['ao2']]
github mozillazg / python-pinyin / tests / test_standard.py View on Github external
def test_uei(hans, kwargs, result):
    assert lazy_pinyin(hans, **kwargs) == result
    assert pinyin(hans, **kwargs) == [result]
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_pinyin_initials():
    """包含声明和韵母的词语"""
    hans = '中心'
    # 默认风格,带声调
    assert pinyin(hans) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 普通风格,不带声调
    assert pinyin(hans, NORMAL) == [['zhong'], ['xin']]
    assert pinyin(hans, NORMAL, strict=False) == [['zhong'], ['xin']]
    # 声调风格,拼音声调在韵母第一个字母上
    assert pinyin(hans, TONE) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, TONE, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 声调风格2,即拼音声调在各个声母之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE2) == [['zho1ng'], ['xi1n']]
    assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
    # 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
    assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
    # 声母风格,只返回各个拼音的声母部分
    assert pinyin(hans, INITIALS) == [['zh'], ['x']]
    assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
    # 首字母风格,只返回拼音的首字母部分
    assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
    assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
    # 注音风格,带声调
    assert pinyin(hans, BOPOMOFO) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    assert pinyin(hans, BOPOMOFO, strict=False) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    # 注音风格,首字母
    assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄓ'], ['ㄒ']]
    assert pinyin(hans, BOPOMOFO_FIRST, strict=False) == [['ㄓ'], ['ㄒ']]
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
hans = '中心'
    # 默认风格,带声调
    assert pinyin(hans) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 普通风格,不带声调
    assert pinyin(hans, NORMAL) == [['zhong'], ['xin']]
    assert pinyin(hans, NORMAL, strict=False) == [['zhong'], ['xin']]
    # 声调风格,拼音声调在韵母第一个字母上
    assert pinyin(hans, TONE) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, TONE, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 声调风格2,即拼音声调在各个声母之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE2) == [['zho1ng'], ['xi1n']]
    assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
    # 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
    assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
    # 声母风格,只返回各个拼音的声母部分
    assert pinyin(hans, INITIALS) == [['zh'], ['x']]
    assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
    # 首字母风格,只返回拼音的首字母部分
    assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
    assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
    # 注音风格,带声调
    assert pinyin(hans, BOPOMOFO) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    assert pinyin(hans, BOPOMOFO, strict=False) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    # 注音风格,首字母
    assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄓ'], ['ㄒ']]
    assert pinyin(hans, BOPOMOFO_FIRST, strict=False) == [['ㄓ'], ['ㄒ']]
    # test CYRILLIC style
    assert pinyin(hans, CYRILLIC) == [['чжун1'], ['синь1']]
    assert pinyin(hans, CYRILLIC, strict=False) == [['чжун1'], ['синь1']]
    # CYRILLIC_FIRST style return only first letters
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
    # 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
    assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
    # 声母风格,只返回各个拼音的声母部分
    assert pinyin(hans, INITIALS) == [['zh'], ['x']]
    assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
    # 首字母风格,只返回拼音的首字母部分
    assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
    assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
    # 注音风格,带声调
    assert pinyin(hans, BOPOMOFO) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    assert pinyin(hans, BOPOMOFO, strict=False) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    # 注音风格,首字母
    assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄓ'], ['ㄒ']]
    assert pinyin(hans, BOPOMOFO_FIRST, strict=False) == [['ㄓ'], ['ㄒ']]
    # test CYRILLIC style
    assert pinyin(hans, CYRILLIC) == [['чжун1'], ['синь1']]
    assert pinyin(hans, CYRILLIC, strict=False) == [['чжун1'], ['синь1']]
    # CYRILLIC_FIRST style return only first letters
    assert pinyin(hans, CYRILLIC_FIRST) == [['ч'], ['с']]
    assert pinyin(hans, CYRILLIC_FIRST, strict=False) == [['ч'], ['с']]
    # 启用多音字模式
    assert pinyin(hans, heteronym=True) == [['zh\u014dng', 'zh\xf2ng'],
                                            ['x\u012bn']]
    assert pinyin(hans, heteronym=True, strict=False) == \
        [['zh\u014dng', 'zh\xf2ng'], ['x\u012bn']]
    # 韵母风格1,只返回各个拼音的韵母部分,不带声调
    assert pinyin(hans, style=FINALS) == [['ong'], ['in']]
    assert pinyin(hans, style=FINALS, strict=False) == [['ong'], ['in']]
    # 韵母风格2,带声调,声调在韵母第一个字母上
    assert pinyin(hans, style=FINALS_TONE) == [['\u014dng'], ['\u012bn']]
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_custom_pinyin_dict_tone2():
    load_single_dict({ord('桔'): 'ce4,si4'}, style='tone2')
    assert lazy_pinyin('桔', style=TONE2) == ['ce4']
    assert pinyin('桔') == [['cè']]
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_36():
    hans = '两年前七斤喝醉了酒'
    pys = ['liang', 'nian', 'qian', 'qi', 'jin', 'he', 'zui', 'le', 'jiu']
    assert lazy_pinyin(hans) == pys
github mozillazg / python-pinyin / tests / contrib / test_neutral_tone.py View on Github external
def test_neutral_tone_with_5_many_cases(input, expected_old, expected_new):
    assert lazy_pinyin(input, style=Style.TONE2) == expected_old
    assert my_pinyin.lazy_pinyin(input, style=Style.TONE2) == expected_new
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
    ['嘸', Style.TONE2, ['m1', 'm2']],
    ['誒', Style.TONE, ['ê̄', 'ế', 'ê̌', 'ề']],
    ['誒', Style.TONE2, ['ê1', 'ê2', 'ê3', 'ê4']],
])
def test_m_e(han, style, expect):
    result = pinyin(han, style=style, heteronym=True)
    assert len(result) == 1
    assert (set(result[0]) & set(expect)) == set(expect)