How to use the jiwer.transforms.AbstractTransform function in jiwer

To help you get started, we’ve selected a few jiwer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jitsi / asr-wer / jiwer / transforms.py View on Github external
class BaseRemoveTransform(AbstractTransform):
    def __init__(self, tokens_to_remove: List[str], replace_token=""):
        self.tokens_to_remove = tokens_to_remove
        self.replace_token = replace_token

    def process_string(self, s: str):
        for w in self.tokens_to_remove:
            s = s.replace(w, self.replace_token)

        return s

    def process_list(self, inp: List[str]):
        return [self.process_string(s) for s in inp]


class SentencesToListOfWords(AbstractTransform):
    def __init__(self, word_delimiter: str = " "):
        """
        Transforms one or more sentences into a list of words. A sentence is
        assumed to be a string, where words are delimited by a token
        (such as ` `, space). Each string is expected to contain only a single sentence.

        :param word_delimiter: the character which delimits words. Default is ` ` (space).
        Default is None (sentences are not delimited)
        """
        self.word_delimiter = word_delimiter

    def process_string(self, s: str):
        return s.split(self.word_delimiter)

    def process_list(self, inp: List[str]):
        words = []
github jitsi / asr-wer / jiwer / transforms.py View on Github external
class Strip(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()


class RemoveEmptyStrings(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()

    def process_list(self, inp: List[str]):
        return [s for s in inp if self.process_string(s) != ""]


class ExpandCommonEnglishContractions(AbstractTransform):
    def process_string(self, s: str):
        # definitely a non exhaustive list

        # specific words
        s = re.sub(r"won't", "will not", s)
        s = re.sub(r"can\'t", "can not", s)
        s = re.sub(r"let\'s", "let us", s)

        # general attachments
        s = re.sub(r"n\'t", " not", s)
        s = re.sub(r"\'re", " are", s)
        s = re.sub(r"\'s", " is", s)
        s = re.sub(r"\'d", " would", s)
        s = re.sub(r"\'ll", " will", s)
        s = re.sub(r"\'t", " not", s)
        s = re.sub(r"\'ve", " have", s)
github jitsi / asr-wer / jiwer / transforms.py View on Github external
class RemoveMultipleSpaces(AbstractTransform):
    def process_string(self, s: str):
        return re.sub(r"\s\s+", " ", s)

    def process_list(self, inp: List[str]):
        return [self.process_string(s) for s in inp]


class Strip(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()


class RemoveEmptyStrings(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()

    def process_list(self, inp: List[str]):
        return [s for s in inp if self.process_string(s) != ""]


class ExpandCommonEnglishContractions(AbstractTransform):
    def process_string(self, s: str):
        # definitely a non exhaustive list

        # specific words
        s = re.sub(r"won't", "will not", s)
        s = re.sub(r"can\'t", "can not", s)
        s = re.sub(r"let\'s", "let us", s)
github jitsi / asr-wer / jiwer / transforms.py View on Github external
def process_list(self, inp: List[str]):
        return [self.process_string(s) for s in inp]


class Compose(object):
    def __init__(self, transforms: List[AbstractTransform]):
        self.transforms = transforms

    def __call__(self, text):
        for tr in self.transforms:
            text = tr(text)

        return text


class BaseRemoveTransform(AbstractTransform):
    def __init__(self, tokens_to_remove: List[str], replace_token=""):
        self.tokens_to_remove = tokens_to_remove
        self.replace_token = replace_token

    def process_string(self, s: str):
        for w in self.tokens_to_remove:
            s = s.replace(w, self.replace_token)

        return s

    def process_list(self, inp: List[str]):
        return [self.process_string(s) for s in inp]


class SentencesToListOfWords(AbstractTransform):
    def __init__(self, word_delimiter: str = " "):
github jitsi / asr-wer / jiwer / transforms.py View on Github external
class RemovePunctuation(BaseRemoveTransform):
    def __init__(self):
        characters = [c for c in string.punctuation]

        super().__init__(characters)


class RemoveMultipleSpaces(AbstractTransform):
    def process_string(self, s: str):
        return re.sub(r"\s\s+", " ", s)

    def process_list(self, inp: List[str]):
        return [self.process_string(s) for s in inp]


class Strip(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()


class RemoveEmptyStrings(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()

    def process_list(self, inp: List[str]):
        return [s for s in inp if self.process_string(s) != ""]


class ExpandCommonEnglishContractions(AbstractTransform):
    def process_string(self, s: str):
        # definitely a non exhaustive list
github jitsi / asr-wer / jiwer / transforms.py View on Github external
s = re.sub(key, value, s)

        return s


class ToLowerCase(AbstractTransform):
    def process_string(self, s: str):
        return s.lower()


class ToUpperCase(AbstractTransform):
    def process_string(self, s: str):
        return s.upper()


class RemoveKaldiNonWords(AbstractTransform):
    def process_string(self, s: str):
        return re.sub(r"[<\[][^>\]]*[>\]]", "", s)
github jitsi / asr-wer / jiwer / transforms.py View on Github external
return s


class SubstituteWords(AbstractTransform):
    def __init__(self, substitutions: Mapping[str, str]):
        self.substitutions = substitutions

    def process_string(self, s: str):
        for key, value in self.substitutions.items():
            s = re.sub(r"\b{}\b".format(re.escape(key)), value, s)

        return s


class SubstituteRegexes(AbstractTransform):
    def __init__(self, substitutions: Mapping[str, str]):
        self.substitutions = substitutions

    def process_string(self, s: str):
        for key, value in self.substitutions.items():
            s = re.sub(key, value, s)

        return s


class ToLowerCase(AbstractTransform):
    def process_string(self, s: str):
        return s.lower()


class ToUpperCase(AbstractTransform):
github jitsi / asr-wer / jiwer / transforms.py View on Github external
def __init__(self, substitutions: Mapping[str, str]):
        self.substitutions = substitutions

    def process_string(self, s: str):
        for key, value in self.substitutions.items():
            s = re.sub(key, value, s)

        return s


class ToLowerCase(AbstractTransform):
    def process_string(self, s: str):
        return s.lower()


class ToUpperCase(AbstractTransform):
    def process_string(self, s: str):
        return s.upper()


class RemoveKaldiNonWords(AbstractTransform):
    def process_string(self, s: str):
        return re.sub(r"[<\[][^>\]]*[>\]]", "", s)
github jitsi / asr-wer / jiwer / transforms.py View on Github external
return s


class SubstituteRegexes(AbstractTransform):
    def __init__(self, substitutions: Mapping[str, str]):
        self.substitutions = substitutions

    def process_string(self, s: str):
        for key, value in self.substitutions.items():
            s = re.sub(key, value, s)

        return s


class ToLowerCase(AbstractTransform):
    def process_string(self, s: str):
        return s.lower()


class ToUpperCase(AbstractTransform):
    def process_string(self, s: str):
        return s.upper()


class RemoveKaldiNonWords(AbstractTransform):
    def process_string(self, s: str):
        return re.sub(r"[<\[][^>\]]*[>\]]", "", s)
github jitsi / asr-wer / jiwer / transforms.py View on Github external
if replace_by_space:
            replace_token = " "
        else:
            replace_token = ""

        super().__init__(characters, replace_token=replace_token)


class RemovePunctuation(BaseRemoveTransform):
    def __init__(self):
        characters = [c for c in string.punctuation]

        super().__init__(characters)


class RemoveMultipleSpaces(AbstractTransform):
    def process_string(self, s: str):
        return re.sub(r"\s\s+", " ", s)

    def process_list(self, inp: List[str]):
        return [self.process_string(s) for s in inp]


class Strip(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()


class RemoveEmptyStrings(AbstractTransform):
    def process_string(self, s: str):
        return s.strip()