How to use the gtts.tokenizer.symbols.ALL_PUNC function in gTTS

To help you get started, we’ve selected a few gTTS examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github luoliyan / chinese-support-redux / chinese / lib / gtts / tokenizer / tokenizer_cases.py View on Github external
def legacy_all_punctuation():  # pragma: no cover b/c tested but Coveralls: ¯\_(ツ)_/¯
    """Match all punctuation.

    Use as only tokenizer case to mimic gTTS 1.x tokenization.
    """
    punc = symbols.ALL_PUNC
    return RegexBuilder(
        pattern_args=punc,
        pattern_func=lambda x: u"{}".format(x)).regex
github pndurette / gTTS / gtts / utils.py View on Github external
# -*- coding: utf-8 -*-
from gtts.tokenizer.symbols import ALL_PUNC as punc
from string import whitespace as ws
import re

_ALL_PUNC_OR_SPACE = re.compile(u"^[{}]*$".format(re.escape(punc + ws)))
"""Regex that matches if an entire line is only comprised
of whitespace and punctuation

"""


def _minimize(the_string, delim, max_size):
    """Recursively split a string in the largest chunks
    possible from the highest position of a delimiter all the way
    to a maximum size

    Args:
        the_string (string): The string to split.
        delim (string): The delimiter to split on.
        max_size (int): The maximum size of a chunk.
github pndurette / gTTS / gtts / tokenizer / tokenizer_cases.py View on Github external
def legacy_all_punctuation():  # pragma: no cover b/c tested but Coveralls: ¯\_(ツ)_/¯
    """Match all punctuation.

    Use as only tokenizer case to mimic gTTS 1.x tokenization.
    """
    punc = symbols.ALL_PUNC
    return RegexBuilder(
        pattern_args=punc,
        pattern_func=lambda x: u"{}".format(x)).regex
github luoliyan / chinese-support-redux / chinese / lib / gtts / tokenizer / tokenizer_cases.py View on Github external
def other_punctuation():
    """Match other punctuation.

    Match other punctuation to split on; punctuation that naturally
    inserts a break in speech.

    """
    punc = ''.join(
        set(symbols.ALL_PUNC) -
        set(symbols.TONE_MARKS) -
        set(symbols.PERIOD_COMMA) -
        set(symbols.COLON))
    return RegexBuilder(
        pattern_args=punc,
        pattern_func=lambda x: u"{}".format(x)).regex
github pndurette / gTTS / gtts / tokenizer / tokenizer_cases.py View on Github external
def other_punctuation():
    """Match other punctuation.

    Match other punctuation to split on; punctuation that naturally
    inserts a break in speech.

    """
    punc = ''.join((
        set(symbols.ALL_PUNC) -
        set(symbols.TONE_MARKS) -
        set(symbols.PERIOD_COMMA)))
    return RegexBuilder(
        pattern_args=punc,
        pattern_func=lambda x: u"{}".format(x)).regex