Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def legacy_all_punctuation(): # pragma: no cover b/c tested but Coveralls: ¯\_(ツ)_/¯
"""Match all punctuation.
Use as only tokenizer case to mimic gTTS 1.x tokenization.
"""
punc = symbols.ALL_PUNC
return RegexBuilder(
pattern_args=punc,
pattern_func=lambda x: u"{}".format(x)).regex
# -*- coding: utf-8 -*-
from gtts.tokenizer.symbols import ALL_PUNC as punc
from string import whitespace as ws
import re
_ALL_PUNC_OR_SPACE = re.compile(u"^[{}]*$".format(re.escape(punc + ws)))
"""Regex that matches if an entire line is only comprised
of whitespace and punctuation
"""
def _minimize(the_string, delim, max_size):
"""Recursively split a string in the largest chunks
possible from the highest position of a delimiter all the way
to a maximum size
Args:
the_string (string): The string to split.
delim (string): The delimiter to split on.
max_size (int): The maximum size of a chunk.
def legacy_all_punctuation(): # pragma: no cover b/c tested but Coveralls: ¯\_(ツ)_/¯
"""Match all punctuation.
Use as only tokenizer case to mimic gTTS 1.x tokenization.
"""
punc = symbols.ALL_PUNC
return RegexBuilder(
pattern_args=punc,
pattern_func=lambda x: u"{}".format(x)).regex
def other_punctuation():
"""Match other punctuation.
Match other punctuation to split on; punctuation that naturally
inserts a break in speech.
"""
punc = ''.join(
set(symbols.ALL_PUNC) -
set(symbols.TONE_MARKS) -
set(symbols.PERIOD_COMMA) -
set(symbols.COLON))
return RegexBuilder(
pattern_args=punc,
pattern_func=lambda x: u"{}".format(x)).regex
def other_punctuation():
"""Match other punctuation.
Match other punctuation to split on; punctuation that naturally
inserts a break in speech.
"""
punc = ''.join((
set(symbols.ALL_PUNC) -
set(symbols.TONE_MARKS) -
set(symbols.PERIOD_COMMA)))
return RegexBuilder(
pattern_args=punc,
pattern_func=lambda x: u"{}".format(x)).regex