How to use the sacrebleu.DEFAULT_TOKENIZER function in sacrebleu

To help you get started, we’ve selected a few sacrebleu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chakki-works / sumeval / sumeval / metrics / bleu.py View on Github external
def __init__(self,
                 smooth_method="floor", smooth_value=0.01,
                 lowercase=False, use_effective_order=True,
                 lang="en"):
        self.smooth_method = smooth_method
        self.smooth_value = smooth_value
        self.lowercase = lowercase
        self.use_effective_order = use_effective_order
        if isinstance(lang, str):
            self.lang = lang
            self._lang = get_lang(lang)
        elif isinstance(lang, BaseLang):
            self.lang = lang.lang
            self._lang = lang

        self._tokenizer = DEFAULT_TOKENIZER
        if self.lang == "ja":
            def tokenizer_ja(text):
                words = self._lang.tokenize_with_preprocess(text)
                return " ".join(words)

            TOKENIZERS["ja"] = tokenizer_ja
            self._tokenizer = "ja"
        elif self.lang == "zh":
            self._tokenizer = "zh"
github yanchaoni / translate_machine_translation / tools / bleu_calculation.py View on Github external
def __init__(self,
                 smooth="floor", smooth_floor=0.01,
                 lowercase=False, use_effective_order=True,
                 tokenizer=DEFAULT_TOKENIZER):
        self.smooth = smooth
        self.smooth_floor = smooth_floor
        self.lowercase = lowercase
        self.use_effective_order = use_effective_order
        self.tokenizer = tokenizer