Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/python
# coding: utf-8
import pycantonese as pc # assuming pycantonese.py is in the same directory
##----------------------------------------------------------------------------##
## read corpus data
dataPath = 'data_sample' # the folder "data_sample" (which has data_sample.txt)
# is in the same directory as this script
currentCorpus = pc.read_corpus(dataPath)
##----------------------------------------------------------------------------##
## customized functions
def printListUTF8(stringlist):
for word in stringlist:
try:
print unicode(word, encoding='utf-8')
except:
print repr(word)
def final_tone(corpus, what_final_tone):
final = what_final_tone[: -1]
tone = what_final_tone[-1]
return pc.search_jp(corpus, [(final, 2), (tone, 3)], 'type')