Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def demo2():
for i in x:
print(i, pycorrector.detect(i))
print(i, pycorrector.correct(i))
# -*- coding: utf-8 -*-
"""
@author:XuMing(xuming624@qq.com)
@description:
"""
import pycorrector
corrected_sent, detail = pycorrector.correct('cv')
print(corrected_sent, detail)
def test1():
for i in text:
print(i, pycorrector.detect(i))
print(i, pycorrector.correct(i))
'这是人工智能的一个分知',
'我的家乡是有名的玉米之乡',
'老师工作非常幸苦,我们要遵敬老师',
'nihao, 耐得住欲妄',
'一阙词牌名',
'我兴高彩列地去公园游玩',
'吹唐人记忆',
' 耐得住欲妄',
'母子平爱',
'不由的感叹道',
]
error_sentences.extend(text_list)
for line in error_sentences:
print(pycorrector.detect(line))
correct_sent = pycorrector.correct(line)
print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
def test2():
for i in x:
print(i, pycorrector.detect(i))
print(i, pycorrector.correct(i))
'这纸厚度如何?质量怎么样',
'双十一下单到现在还没发货的,',
'一但工作效率提升,需要时间在工作上也减少',
'可否送手机膜?送膜吗',
'这水用来洗脸泡脚效果如何',
'五香的不辣吧',
'这款功率真有2000w吗',
'我对于宠物出租得事非常认同,因为其实很多人喜欢宠物', # 出租的事
'有了宠物出租地方另一方面还可以题高人类对动物的了解,因为那些专业人氏可以指导我们对于动物的习惯。', # 题高 => 提高 专业人氏 => 专业人士
'三个凑皮匠胜过一个诸葛亮也有道理。', # 凑
'还有广告业是只要桌子前面坐者工作未必产生出来好的成果。',
]
pycorrector.set_custom_confusion_dict(path='./my_confusion.txt')
pycorrector.set_custom_word(path='./my_custom_word.txt')
for line in error_sentences:
correct_sent = pycorrector.correct(line)
print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
print('*' * 53)
pycorrector.enable_char_error(enable=False)
# pycorrector.enable_word_error(enable=False)
for line in error_sentences:
# idx_errors = pycorrector.detect(line)
# print(idx_errors)
correct_sent = pycorrector.correct(line)
print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
]
for line in error_sentences:
idx_errors = pycorrector.detect(line)
print(idx_errors)
correct_sent = pycorrector.correct(line)
print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
print('*' * 53)
pycorrector.set_custom_confusion_dict(path='./my_confusion.txt')
pycorrector.set_custom_word(path='./my_custom_word.txt')
for line in error_sentences:
idx_errors = pycorrector.detect(line)
print(idx_errors)
correct_sent = pycorrector.correct(line)
print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
'还有广告业是只要桌子前面坐者工作未必产生出来好的成果。',
]
pycorrector.set_custom_confusion_dict(path='./my_confusion.txt')
pycorrector.set_custom_word(path='./my_custom_word.txt')
for line in error_sentences:
correct_sent = pycorrector.correct(line)
print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
print('*' * 53)
pycorrector.enable_char_error(enable=False)
# pycorrector.enable_word_error(enable=False)
for line in error_sentences:
# idx_errors = pycorrector.detect(line)
# print(idx_errors)
correct_sent = pycorrector.correct(line)
print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
def eval_bcmi_data(data_path, verbose=False):
sentence_size = 1
right_count = 0
right_result = dict()
wrong_result = dict()
with open(data_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
error_sentence, right_sentence, right_detail = get_bcmi_corpus(line)
if not error_sentence:
continue
pred_sentence, pred_detail = correct(error_sentence)
if verbose:
print('input sentence:', error_sentence)
print('pred sentence:', pred_sentence, pred_detail)
print('right sentence:', right_sentence, right_detail)
sentence_size += 1
if right_sentence == pred_sentence:
right_count += 1
right_result[error_sentence] = [right_sentence, pred_sentence]
else:
wrong_result[error_sentence] = [right_sentence, pred_sentence]
if verbose:
print('right count:', right_count, ';sentence size:', sentence_size)
return right_count / sentence_size, right_result, wrong_result