Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _add_lab(txtlines, wav_dir_path):
logger = logging.getLogger('mtts')
for line in txtlines:
numstr, txt = line.split(' ')
txt = re.sub('#\d', '', txt)
pinyin_list = pinyin(txt, style=Style.TONE3)
new_pinyin_list = []
for item in pinyin_list:
if not item:
logger.warning(
'{file_num} do not generate right pinyin'.format(numstr))
if not item[0][-1].isdigit():
phone = item[0] + '5'
else:
phone = item[0]
new_pinyin_list.append(phone)
lab_file = os.path.join(wav_dir_path, numstr + '.lab')
with open(lab_file, 'w') as oid:
oid.write(' '.join(new_pinyin_list))
for line in lines:
check_text = "".join(line.split()[1:])
english_check = re.search('[a-zA-Z]', check_text)
if english_check:
continue
#print(line)
fname, text = line.strip().split()
#text = text_normalize(text)
if len(text) > hp.max_len:
continue
# bopomofo base
if hp.input_mode == "pinyin" or hp.input_mode == "pinyin_syl":
text_pinyin = pinyin(text, style=Style.TONE3)
text = ""
for t in text_pinyin:
if hp.withtone:
if hp.input_mode == "pinyin":
text = text + t[0]
else:
text = text + " " + t[0]
else:
tmp = ''.join([i for i in t[0] if not i.isdigit()])
if hp.input_mode == "pinyin":
text = text + tmp
else:
text = text + " " + t[0]
elif hp.input_mode == "phoneme":
t_tmp = ""
for t in text:
def trans_aishell_to_pinyin(word_path, pinyin_path):
# 需要转换为拼音的中文汉字路径
textobj = open(word_path, 'r+', encoding='UTF-8')
# 转化为拼音后的保存txt路径
savefile = open(pinyin_path, 'w+', encoding='UTF-8')
# 对aishell进行文本数据处理
for x in textobj.readlines():
textlabel = x.strip('\n')
textlabel = textlabel.split(' ')
x = pinyin(textlabel,style=Style.TONE3)
str2 = ''
for i in x:
str1 = " ".join(i)
if (re.search(r'\d',str1)):
pass
else:
str1 += '5'
str2 = str2 + str1 + ' '
str2 = str2[:-1]
# 保存生成的数据
savefile.write(str2 + "\n")
def _get_pinyin(text, std=True, pb=False):
'''
Params:
text: string, normalized sentences
std: boolean, standard pinyin stylc, default: standard pinyin style
pb: boolen, prosody label, default: False
Returns:
pinyin: string
'''
if std:
style = pypinyin.Style.TONE3
else:
style = 'nonstd'
text = text.strip()
punctuation = ', '
if text[-1:] in list(',.?!'):
punctuation = text[-1:] + ' '
text = text[:-1]
pinyin = []
if pb:
for word in jieba.cut(text):
for p in pypinyin.pinyin(word, style):
if p[0][-1] not in ['1', '2', '3', '4']:
pinyin.append(p[0] + '5')
def txt2pinyin(txt):
phone_list = []
'''
if isinstance(txt, str):
pinyin_list = pinyin(unicode(txt,'utf-8'), style = Style.TONE3)
elif isinstance(txt, unicode):
pinyin_list = pinyin(txt, style = Style.TONE3)
else:
print('error: unsupport coding form')
'''
pinyin_list = pinyin(txt, style = Style.TONE3)
for item in pinyin_list:
phone_list.append(seprate_syllabel(pinyinformat(item[0])))
return phone_list
from __future__ import unicode_literals
from argparse import ArgumentParser
import logging
import sys
import pypinyin
from pypinyin.compat import PY2
style_map = {
'NORMAL': pypinyin.Style.NORMAL,
'zhao': pypinyin.Style.NORMAL,
'TONE': pypinyin.Style.TONE,
'zh4ao': pypinyin.Style.TONE,
'TONE2': pypinyin.Style.TONE2,
'zha4o': pypinyin.Style.TONE2,
'TONE3': pypinyin.Style.TONE3,
'zhao4': pypinyin.Style.TONE3,
'INITIALS': pypinyin.Style.INITIALS,
'zh': pypinyin.Style.INITIALS,
'FIRST_LETTER': pypinyin.Style.FIRST_LETTER,
'z': pypinyin.Style.FIRST_LETTER,
'FINALS': pypinyin.Style.FINALS,
'ao': pypinyin.Style.FINALS,
'FINALS_TONE': pypinyin.Style.FINALS_TONE,
'4ao': pypinyin.Style.FINALS_TONE,
'FINALS_TONE2': pypinyin.Style.FINALS_TONE2,
'a4o': pypinyin.Style.FINALS_TONE2,
'FINALS_TONE3': pypinyin.Style.FINALS_TONE3,
'ao4': pypinyin.Style.FINALS_TONE3,
'BOPOMOFO': pypinyin.Style.BOPOMOFO,
'BOPOMOFO_FIRST': pypinyin.Style.BOPOMOFO_FIRST,
'CYRILLIC': pypinyin.Style.CYRILLIC,