Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
mecab = MeCab.Tagger("-Ochasen")
mecab.parse('') # 空でパースする必要がある
node = mecab.parseToNode(text)
ret = []
while node is not None:
origin = node.surface # もとの単語を代入
if not origin:
node = node.next
continue
# originが空のとき、漢字以外の時はふりがなを振る必要がないのでそのまま出力する
if origin != "" and any(is_kanji(_) for _ in origin):
kana = node.feature.split(",")[7] # 読み仮名を代入
hiragana = jaconv.kata2hira(kana)
for pair in split_okurigana(origin, hiragana):
ret += [pair]
else:
if origin:
ret += [(origin,)]
node = node.next
return ret
with open(kanji_map_file_path, 'w') as f:
# Reserve index
all_char_set.discard('N')
all_char_set.discard('Z')
all_char_set.discard('_')
if model == 'attention':
all_char_set.discard('<')
all_char_set.discard('>')
kanji_set = set([])
for char in all_char_set:
if (not is_hiragana(char)) and (not is_katakana(char)):
kanji_set.add(char)
for kana in kana_list:
kanji_set.add(kana)
kanji_set.add(jaconv.kata2hira(kana))
if model == 'ctc':
kanji_list = ['_', 'NZ'] + sorted(list(kanji_set))
elif model == 'attention':
kanji_list = ['_', '<', '>', 'NZ'] + sorted(list(kanji_set))
for i, kanji in enumerate(kanji_list):
f.write('%s %s\n' % (kanji, str(i)))
# kana
with open(kana_map_file_path, 'w') as f:
if model == 'ctc':
kana_list = ['_', 'NZ'] + kana_list
elif model == 'attention':
kana_list = ['_', '<', '>', 'NZ'] + kana_list
for i, kana in enumerate(kana_list):
f.write('%s %s\n' % (kana, str(i)))
# kanji
with open(kanji_map_file_path, 'w') as f:
# インデックスを予約するラベル
all_char_set.discard('N')
all_char_set.discard('Z')
all_char_set.discard('_')
all_char_set.discard('<')
all_char_set.discard('>')
kanji_set = set([])
for char in all_char_set:
if (not is_hiragana(char)) and (not is_katakana(char)):
kanji_set.add(char)
for kana in kana_list:
kanji_set.add(kana)
kanji_set.add(jaconv.kata2hira(kana))
# NOTE: 頻出するラベルにはなるべく小さいインデックスを与える
kanji_list = ['_', '<', '>', 'NZ'] + sorted(list(kanji_set))
for index, kanji in enumerate(kanji_list):
f.write('%s %s\n' % (kanji, str(index)))
# kana
with open(kana_map_file_path, 'w') as f:
kana_list = ['_', '<', '>', 'NZ'] + kana_list
for index, kana in enumerate(kana_list):
f.write('%s %s\n' % (kana, str(index)))
# phone
with open(phone_map_file_path, 'w') as f:
phone_list = ['_', '<', '>', 'NZ'] + sorted(list(phone_set))
for index, phone in enumerate(phone_list):
f.write('%s %s\n' % (phone, str(index)))
import jaconv
from WaniKaniTools.api import APIv2
if __name__ == '__main__':
api_v2 = APIv2()
result = api_v2.GET('subjects', params={'types':'vocabulary'})
# print(json.dumps(result, indent=4))
while True:
for data in result['data']:
reading_array = set()
for reading in data["data"]["readings"]:
# if reading['primary']:
reading_array.add(jaconv.kata2hira(reading['reading']))
if len(reading_array) == 1:
continue
meaning_array = []
for meaning in data["data"]["meanings"]:
if meaning['primary']:
meaning_array += [meaning['meaning']]
to_print = (
data["data"]["characters"] if "characters" in data["data"] else data["data"]["character"],
', '.join(reading_array),
', '.join(meaning_array)
)
print('\t'.join(to_print))