How to use the pypinyin.TONE2 function in pypinyin

To help you get started, we’ve selected a few pypinyin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_pinyin_initials():
    """包含声明和韵母的词语"""
    hans = '中心'
    # 默认风格,带声调
    assert pinyin(hans) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 普通风格,不带声调
    assert pinyin(hans, NORMAL) == [['zhong'], ['xin']]
    assert pinyin(hans, NORMAL, strict=False) == [['zhong'], ['xin']]
    # 声调风格,拼音声调在韵母第一个字母上
    assert pinyin(hans, TONE) == [['zh\u014dng'], ['x\u012bn']]
    assert pinyin(hans, TONE, strict=False) == [['zh\u014dng'], ['x\u012bn']]
    # 声调风格2,即拼音声调在各个声母之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE2) == [['zho1ng'], ['xi1n']]
    assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
    # 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
    assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
    assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
    # 声母风格,只返回各个拼音的声母部分
    assert pinyin(hans, INITIALS) == [['zh'], ['x']]
    assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
    # 首字母风格,只返回拼音的首字母部分
    assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
    assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
    # 注音风格,带声调
    assert pinyin(hans, BOPOMOFO) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    assert pinyin(hans, BOPOMOFO, strict=False) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    # 注音风格,首字母
    assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄓ'], ['ㄒ']]
    assert pinyin(hans, BOPOMOFO_FIRST, strict=False) == [['ㄓ'], ['ㄒ']]
github mozillazg / python-pinyin / tests / test_standard.py View on Github external
['因', dict(style=FINALS, strict=False), ['in']],
    ['央', dict(style=NORMAL), ['yang']],
    ['央', dict(style=FINALS), ['iang']],
    ['央', dict(style=FINALS, strict=False), ['ang']],
    ['英', dict(style=NORMAL), ['ying']],
    ['英', dict(style=FINALS), ['ing']],
    ['英', dict(style=FINALS, strict=False), ['ing']],
    ['雍', dict(style=NORMAL), ['yong']],
    ['雍', dict(style=FINALS), ['iong']],
    ['雍', dict(style=FINALS, strict=False), ['ong']],

    ['宜', dict(style=NORMAL), ['yi']],
    ['宜', dict(style=NORMAL, strict=False), ['yi']],
    ['宜', dict(style=TONE), ['yí']],
    ['宜', dict(style=TONE, strict=False), ['yí']],
    ['宜', dict(style=TONE2), ['yi2']],
    ['宜', dict(style=TONE2, strict=False), ['yi2']],
    ['宜', dict(style=TONE3), ['yi2']],
    ['宜', dict(style=TONE3, strict=False), ['yi2']],
    ['宜', dict(style=INITIALS), ['']],
    ['宜', dict(style=INITIALS, strict=False), ['y']],
    ['宜', dict(style=FIRST_LETTER), ['y']],
    ['宜', dict(style=FIRST_LETTER, strict=False), ['y']],
    ['宜', dict(style=FINALS), ['i']],
    ['宜', dict(style=FINALS, strict=False), ['i']],
    ['宜', dict(style=FINALS_TONE), ['í']],
    ['宜', dict(style=FINALS_TONE, strict=False), ['í']],
    ['宜', dict(style=FINALS_TONE2), ['i2']],
    ['宜', dict(style=FINALS_TONE2, strict=False), ['i2']],
    ['宜', dict(style=FINALS_TONE3), ['i2']],
    ['宜', dict(style=FINALS_TONE3, strict=False), ['i2']],
github mozillazg / python-pinyin / tests / test_standard.py View on Github external
# ü行的韵母,前面没有声母的时候,写成yu(迂),yue(约),yuan(冤),
    ['迂', dict(style=NORMAL), ['yu']],
    ['迂', dict(style=FINALS), ['v']],
    ['迂', dict(style=FINALS, strict=False), ['u']],
    ['约', dict(style=NORMAL), ['yue']],
    ['约', dict(style=FINALS), ['ve']],
    ['约', dict(style=FINALS, strict=False), ['ue']],
    ['冤', dict(style=NORMAL), ['yuan']],
    ['冤', dict(style=FINALS), ['van']],
    ['冤', dict(style=FINALS, strict=False), ['uan']],

    ['鱼', dict(style=NORMAL), ['yu']],
    ['鱼', dict(style=NORMAL, strict=False), ['yu']],
    ['鱼', dict(style=TONE), ['yú']],
    ['鱼', dict(style=TONE, strict=False), ['yú']],
    ['鱼', dict(style=TONE2), ['yu2']],
    ['鱼', dict(style=TONE2, strict=False), ['yu2']],
    ['鱼', dict(style=TONE3), ['yu2']],
    ['鱼', dict(style=TONE3, strict=False), ['yu2']],
    ['鱼', dict(style=INITIALS), ['']],
    ['鱼', dict(style=INITIALS, strict=False), ['y']],
    ['鱼', dict(style=FIRST_LETTER), ['y']],
    ['鱼', dict(style=FIRST_LETTER, strict=False), ['y']],
    ['鱼', dict(style=FINALS), ['v']],
    ['鱼', dict(style=FINALS, strict=False), ['u']],
    ['鱼', dict(style=FINALS_TONE), ['ǘ']],
    ['鱼', dict(style=FINALS_TONE, strict=False), ['ú']],
    ['鱼', dict(style=FINALS_TONE2), ['v2']],
    ['鱼', dict(style=FINALS_TONE2, strict=False), ['u2']],
    ['鱼', dict(style=FINALS_TONE3), ['v2']],
    ['鱼', dict(style=FINALS_TONE3, strict=False), ['u2']],
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_errors():
    hans = (
        ('啊', {'style': TONE2}, [['a']]),
        ('啊a', {'style': TONE2}, [['a'], ['a']]),
        # 非中文字符,没有拼音
        ('⺁', {'style': TONE2}, [['\u2e81']]),
        ('⺁', {'style': TONE2, 'errors': 'ignore'}, []),
        ('⺁', {'style': TONE2, 'errors': 'replace'}, [['2e81']]),
        ('⺁⺁', {'style': TONE2, 'errors': 'replace'}, [['2e812e81']]),
        ('⺁⺁', {'style': TONE2, 'errors': lambda x: ['a' for _ in x]},
         [['a'], ['a']]),
        ('⺁⺁', {'style': TONE2, 'errors': lambda x: [['a', 'b'], ['b', 'c']]},
         [['a'], ['b']]),
        ('⺁⺁', {'style': TONE2, 'heteronym': True,
                'errors': lambda x: [['a', 'b'], ['b', 'c']]},
         [['a', 'b'], ['b', 'c']]),
        # 中文字符,没有拼音
        ('鿅', {'style': TONE2}, [['\u9fc5']]),
        ('鿅', {'style': TONE2, 'errors': 'ignore'}, []),
        ('鿅', {'style': TONE2, 'errors': '233'}, []),
        ('鿅', {'style': TONE2, 'errors': 'replace'}, [['9fc5']]),
        ('鿅', {'style': TONE2, 'errors': lambda x: ['a']}, [['a']]),
        ('鿅', {'style': TONE2, 'errors': lambda x: None}, []),
        ('鿅鿅', {'style': TONE2, 'errors': lambda x: ['a' for _ in x]},
         [['a'], ['a']]),
github Lab41 / sunny-side-up / src / datasets / open_weiboscope.py View on Github external
def hanzi_to_pinyin(txt):
        """
        Returns a version of txt with Chinese characters replaced with alphanumeric
        pinyin romanization
        
        Args:
            txt -- Chinese text with Chinese characters in it (unicode)
        Returns:
            unicode with romanized version of txt
        """
        pinyin = pyp.lazy_pinyin(txt, style=pyp.TONE2)
        return u''.join(pinyin)
github Zeco-01 / NLPCC-2016-WordSim / NLPCC_WORDSIM / PreProc / get_sims.py View on Github external
def get_pinyin_sim(word1, word2):
    i = 0
    count = 0
    while i < len(word1) and i < len(word2):
        py1 = pinyin(word1[i], style=pypinyin.TONE2, heteronym=True)[0]
        for p1 in py1:
            is_contain = False
            py2 = pinyin(word2[i], style=pypinyin.TONE2, heteronym=True)[0]
            for p2 in py2:
                if p1 == p2:
                    count += 1
                    is_contain = True
                    break
            if is_contain:
                break
        i += 1
    pinyin_sim = 2 * count * 1.0 / (len(word1) + len(word2))
    return pinyin_sim
github shibing624 / pycorrector / pycorrector / detect.py View on Github external
def get_homophones_by_pinyin(input_pinyin):
    """
    根据拼音取同音字
    :param input_pinyin:
    :return:
    """
    result = []
    # CJK统一汉字区的范围是0x4E00-0x9FA5,也就是我们经常提到的20902个汉字
    for i in range(0x4e00, 0x9fa6):
        if pinyin([chr(i)], style=pypinyin.TONE2)[0][0] == input_pinyin:
            # TONE2: 中zho1ng
            result.append(chr(i))
    return result
github awesome-archive / tacotron_cn / chinese2pinyin.py View on Github external
def ch2p(speech):
    if type(speech) == str:
        # print('拼音转换: ', speech)
        syllables = lazy_pinyin(speech, style=pypinyin.TONE2)
        # print('---------1 ', speech, '----------')
        syllables = text2pinyin(syllables)
        text = ' '.join(syllables)
        ''''''
        for alpha, pronuce in alpha_pronuce.items():
            text = text.replace(alpha, pronuce)
        text = text.replace("  "," ")
        text = text.replace("  ", " ")

        return text
    else:
        print("input format error")
github Zeco-01 / NLPCC-2016-WordSim / NLPCC_WORDSIM / PreProc / get_sims.py View on Github external
def get_pinyin_sim(word1, word2):
    i = 0
    count = 0
    while i < len(word1) and i < len(word2):
        py1 = pinyin(word1[i], style=pypinyin.TONE2, heteronym=True)[0]
        for p1 in py1:
            is_contain = False
            py2 = pinyin(word2[i], style=pypinyin.TONE2, heteronym=True)[0]
            for p2 in py2:
                if p1 == p2:
                    count += 1
                    is_contain = True
                    break
            if is_contain:
                break
        i += 1
    pinyin_sim = 2 * count * 1.0 / (len(word1) + len(word2))
    return pinyin_sim