How to use the jaconv.hira2kata function in jaconv

To help you get started, we’ve selected a few jaconv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github musyoku / chainer-speech-recognition / tools / preprocess / bigram.py View on Github external
trn_dir_list = [os.path.join(trn_base_dir, category) for category in ["core", "noncore"]]
	all_triphone_sequences = []

	for dir_idx, trn_dir in enumerate(trn_dir_list):
		trn_files = os.listdir(trn_dir)

		for file_idx, trn_filename in enumerate(trn_files):
			printr("\r{}/{} ({}/{})".format(file_idx + 1, len(trn_files), dir_idx + 1, len(trn_dir_list)))
			trn_path = os.path.join(trn_dir, trn_filename)

			with codecs.open(trn_path, "r", "utf-8") as f:
				for data in f:
					components = data.split(":")
					assert len(components) == 3
					sentence = components[-1].strip()
					sentence = jaconv.hira2kata(sentence) # 強制カタカナ変換
					unigram_tokens = convert_sentence_to_unigram_tokens(sentence)
					for token in unigram_tokens:
						if token not in unigram_counts:
							raise Exception(token)
						unigram_counts[token] += 1
					if len(unigram_tokens) == 1:
						continue
					for first, second in zip(unigram_tokens[:-1], unigram_tokens[1:]):
						if first == u"ー":
							continue
						if second == u"ー":
							continue
						key = first + second
						if key not in bigram_counts:
							raise Exception(key)
						bigram_counts[key] += 1
github yoriyuki / nksnd / nksnd / utils / words.py View on Github external
def katakana(string):
    return jaconv.hira2kata(string)
github marcan / blitzloop / blitzloop / web.py View on Github external
def normalize(s):
    val = jaconv.h2z(s)
    val = jaconv.hira2kata(val)
    val = val.translate(normalize_tbl)
    return val
github r9y9 / deepvoice3_pytorch / deepvoice3_pytorch / frontend / jp / __init__.py View on Github external
def text_to_sequence(text, p=0.0):
    for c in [" ", " ", "「", "」", "『", "』", "・", "【", "】",
              "(", ")", "(", ")"]:
        text = text.replace(c, "")
    text = text.replace("!", "!")
    text = text.replace("?", "?")

    text = normalize_delimitor(text)
    text = jaconv.normalize(text)
    if p > 0:
        text = mix_pronunciation(text, p)
    text = jaconv.hira2kata(text)
    text = add_punctuation(text)

    return [ord(c) for c in text] + [_eos]  # EOS
github musyoku / chainer-speech-recognition / asr / data / processing.py View on Github external
logmel, delta, delta_delta = fft.compute_deltas(logmel)

			logmel = logmel.T
			delta = delta.T
			delta_delta = delta_delta.T

			if logmel.shape[1] > max_feature_length:
				max_feature_length = logmel.shape[1]
			if len(sentence) > max_sentence_length:
				max_sentence_length = len(sentence)

			if logmel.shape[1] == 0:
				continue

			audio_features.append((logmel, delta, delta_delta))
			sentence = jaconv.hira2kata(sentence) # 強制カタカナ変換
			sentences.append(sentence)

		assert max_feature_length > 0
		return audio_features, sentences, max_feature_length, max_sentence_length
github hash2430 / dv3_world / deepvoice3_pytorch / frontend / jp / __init__.py View on Github external
def text_to_sequence(text, p=0.0):
    for c in [" ", " ", "「", "」", "『", "』", "・", "【", "】",
              "(", ")", "(", ")"]:
        text = text.replace(c, "")
    text = text.replace("!", "!")
    text = text.replace("?", "?")

    text = normalize_delimitor(text)
    text = jaconv.normalize(text)
    if p > 0:
        text = mix_pronunciation(text, p)
    text = jaconv.hira2kata(text)
    text = add_punctuation(text)

    return [ord(c) for c in text] + [_eos]  # EOS