Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _tokenize(self, text):
# Pre-clean
text = text.strip()
# Apply pre-processors
for pp in self.pre_processor_funcs:
log.debug("pre-processing: %s", pp)
text = pp(text)
if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])
# Tokenize
log.debug("tokenizing: %s", self.tokenizer_func)
tokens = self.tokenizer_func(text)
# Clean
tokens = _clean_tokens(tokens)
# Minimize
min_tokens = []
for t in tokens:
min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
return min_tokens
text = text.strip()
# Apply pre-processors
for pp in self.pre_processor_funcs:
log.debug("pre-processing: %s", pp)
text = pp(text)
if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])
# Tokenize
log.debug("tokenizing: %s", self.tokenizer_func)
tokens = self.tokenizer_func(text)
# Clean
tokens = _clean_tokens(tokens)
# Minimize
min_tokens = []
for t in tokens:
min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
return min_tokens
text = text.strip()
# Apply pre-processors
for pp in self.pre_processor_funcs:
log.debug("pre-processing: %s", pp)
text = pp(text)
if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])
# Tokenize
log.debug("tokenizing: %s", self.tokenizer_func)
tokens = self.tokenizer_func(text)
# Clean
tokens = _clean_tokens(tokens)
# Minimize
min_tokens = []
for t in tokens:
min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
return min_tokens
def _tokenize(self, text):
# Pre-clean
text = text.strip()
# Apply pre-processors
for pp in self.pre_processor_funcs:
log.debug("pre-processing: %s", pp)
text = pp(text)
if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])
# Tokenize
log.debug("tokenizing: %s", self.tokenizer_func)
tokens = self.tokenizer_func(text)
# Clean
tokens = _clean_tokens(tokens)
# Minimize
min_tokens = []
for t in tokens:
min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
return min_tokens
def _tokenize(self, text):
# Pre-clean
text = text.strip()
# Apply pre-processors
for pp in self.pre_processor_funcs:
log.debug("pre-processing: %s", pp)
text = pp(text)
if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])
# Tokenize
log.debug("tokenizing: %s", self.tokenizer_func)
tokens = self.tokenizer_func(text)
# Clean
tokens = _clean_tokens(tokens)
# Minimize
min_tokens = []
for t in tokens:
min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
return min_tokens
text = text.strip()
# Apply pre-processors
for pp in self.pre_processor_funcs:
log.debug("pre-processing: %s", pp)
text = pp(text)
if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])
# Tokenize
log.debug("tokenizing: %s", self.tokenizer_func)
tokens = self.tokenizer_func(text)
# Clean
tokens = _clean_tokens(tokens)
# Minimize
min_tokens = []
for t in tokens:
min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
return min_tokens