How to use the pypinyin.lazy_pinyin function in pypinyin

To help you get started, we’ve selected a few pypinyin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_custom_pinyin_dict_tone2():
    load_single_dict({ord('桔'): 'ce4,si4'}, style='tone2')
    assert lazy_pinyin('桔', style=TONE2) == ['ce4']
    assert pinyin('桔') == [['cè']]
github mozillazg / python-pinyin / tests / test_pinyin.py View on Github external
def test_36():
    hans = '两年前七斤喝醉了酒'
    pys = ['liang', 'nian', 'qian', 'qi', 'jin', 'he', 'zui', 'le', 'jiu']
    assert lazy_pinyin(hans) == pys
github chenghuige / wenzheng / utils / gezi / segment.py View on Github external
words = segment_utf8_pinyin2(text)
    elif method == 'basic2':
      words = kf.deform_dis_get_words(text)
    elif method =='char_pinyin':
      from pypinyin import lazy_pinyin as pinyin
      #return pinyin(text)
      return [x.strip() for x in pinyin(text)]
    elif method =='char_pinyin2':
      from pypinyin import lazy_pinyin as pinyin
      return [''.join(pinyin(x)) for x in text]
    elif method == 'char_then_pinyin':
      # In [2]: pinyin('补肾微信xtx0329')
      # Out[2]: ['bu', 'shen', 'wei', 'xin', 'xtx0329'] 
      # so as 补,肾,微,信,x,t,x,0,3,2,9,<s>,bu,shen,wei,xin,
      from pypinyin import lazy_pinyin as pinyin
      return  segment_char(text) + ['<s>'] + [x.strip() for x in pinyin(text)]
    elif method == 'char_then_pinyin2':
      from pypinyin import lazy_pinyin as pinyin
      return  segment_char(text) + ['<s>'] + [''.join(pinyin(x)).strip() for x in text if x.strip()]
    elif method == 'word_char':
      return [x for x in cut(text, cut_all=False)] + ['<s>'] + segment_char(text)
    elif method == 'word_char_pinyin':
      from pypinyin import lazy_pinyin as pinyin
      return [x for x in cut(text, cut_all=False)] + ['<s>'] + segment_char(text) + ['<s>'] + [x.strip() for x in pinyin(text)]
    elif method == 'word_char_pinyin2':
      from pypinyin import lazy_pinyin as pinyin
      return [x for x in cut(text, cut_all=False)] + ['<s>'] + segment_char(text) + ['<s>'] + [''.join(pinyin(x)).strip() for x in text if x.strip()]
    elif method == 'tab':
      words = text.strip().split('\t')
    elif method == 'white_space':
      words = text.strip().split()
    else:</s></s></s></s></s></s></s></s>
github shibing624 / parrots / parrots / tts.py View on Github external
def synthesize(self, input_text='', output_wav_path=''):
        """
        Synthesize .wav from text
        input_text: the folder that contains all syllables .wav files
        output_wav_path: the destination folder to save the synthesized file
        """
        delay = 0
        increment = 355  # milliseconds
        pause = 500  # pause for punctuation
        syllables = lazy_pinyin(input_text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500 * len(input_text))
        for syllable in syllables:
            path = os.path.join(self.syllables_dir, syllable + ".wav")
            sound_file = Path(path)
            # insert 500 sr silence for punctuation marks
            if syllable in self.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
                continue
            # skip sound file that doesn't exist
            if not sound_file.is_file():
                continue
            segment = AudioSegment.from_wav(path)
github binderclip / code-snippets-python / packages / pypinyin_sp / pypinyin_sp.py View on Github external
def main():
    print(lazy_pinyin('中国'))
    print(''.join(lazy_pinyin('大西瓜')))
    print(lazy_pinyin('T恤'))
    print(lazy_pinyin('big西瓜373a'))
github gswyhq / hello-world / pinyin / 生成成语的拼音.py View on Github external
def read_btxt2(infile,outfile):
    '''读取成语文件,生成相应的拼音字典,共44482词,结果如下所示:
    {"钝口拙腮": ["dun", "kou", "zhuo", "sai"], 
    "怜我怜卿": ["lian", "wo", "lian", "qing"]}
    版本2,共55222词
    '''
    dict_data={}
    
    with open(infile,'r',encoding='utf8')as f:
        data=json.load(f)
        for d in data:
            line=d.strip() #.decode('gb18030')
            if len(line)!=4:
                continue
            dict_data.setdefault(line,lazy_pinyin(line))
    with open(outfile,'w',encoding='utf8')as fo:
        json.dump(dict_data,fo,ensure_ascii=0) 
    print('总共成语数:',len(dict_data.keys()))
github caomike / ppmessage / ppmessage / api / handlers / getyvobjectdetailhandler.py View on Github external
_updatetime = _o["updatetime"]

        _u = int(time.mktime(_updatetime.timetuple()))
        if _u > _timestamp:
            _rdata["type"] = YVOBJECT.AU
            _rdata["uuid"] = _o["user_uuid"]
            _rdata["email"] = _o["user_email"]
            _rdata["desc"] = None
            _rdata["fullname"] = _o["user_fullname"]

            _fn = _o["user_fullname"]
            if not isinstance(_fn, unicode):
                _fn = _fn.encode("utf-8")

            _rdata["pinyinname0"] = "".join(lazy_pinyin(_fn))
            _rdata["pinyinname1"] = "".join(list(itertools.chain.from_iterable(pinyin(_fn, style=pypinyin.INITIALS))))
            _rdata["icon"] = _o["user_icon"]
            _rdata["timestamp"] = _u
            _rdata["updated"] = True
        else:
            _rdata["updated"] = False
        return
github python-cn / slack_bot / slack_bot / plugins / utils.py View on Github external
def to_pinyin(word):
    if not isinstance(word, unicode):
        word = word.decode('utf-8')
    return ''.join(lazy_pinyin(word))
github shibing624 / pycorrector / pycorrector / cn_spell.py View on Github external
def candidates(word):
    candidates_1_order = []
    candidates_2_order = []
    candidates_3_order = []
    error_pinyin = lazy_pinyin(word)
    cn_char_set = load_word_dict(char_file_path)
    candidate_words = list(known(edit_distance_word(word, cn_char_set)))
    for candidate_word in candidate_words:
        candidata_pinyin = lazy_pinyin(candidate_word)
        if candidata_pinyin == error_pinyin:
            candidates_1_order.append(candidate_word)
        elif candidata_pinyin[0] == error_pinyin[0]:
            candidates_2_order.append(candidate_word)
        else:
            candidates_3_order.append(candidate_word)
    return candidates_1_order, candidates_2_order, candidates_3_order