Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# same first pinyin
confusion_char_set = _get_confusion_set(word[0])
confusion = [i + word[1:] for i in confusion_char_set if i]
candidates_2_order.extend(confusion)
# same last pinyin
confusion_char_set = _get_confusion_set(word[-1])
confusion = [word[:-1] + i for i in confusion_char_set]
candidates_2_order.extend(confusion)
if len(word) > 2:
# same mid pinyin
confusion_char_set = _get_confusion_set(word[1])
confusion = [word[0] + i + word[2:] for i in confusion_char_set]
candidates_3_order.extend(confusion)
# add all confusion word list
confusion_word_set = set(candidates_1_order + candidates_2_order + candidates_3_order)
confusion_word_list = [item for item in confusion_word_set if is_chinese_string(item)]
confusion_sorted = sorted(confusion_word_list, key=lambda k: \
get_frequency(k), reverse=True)
return confusion_sorted[:len(confusion_word_list) // fraction + 1]
def _generate_items(word, fraction=2):
if not is_chinese_string(word):
return []
candidates_1_order = []
candidates_2_order = []
candidates_3_order = []
candidate_words = list(_known(_edit_distance_word(word, cn_char_set)))
for candidate_word in candidate_words:
if lazy_pinyin(candidate_word) == lazy_pinyin(word):
# same pinyin
candidates_1_order.append(candidate_word)
if len(word) == 1:
# same pinyin
confusion_char_set = _get_confusion_set(word[0])
confusion = [i for i in confusion_char_set if i]
candidates_2_order.extend(confusion)
if len(word) > 1:
# same first pinyin