How to use the sacrebleu.TOKENIZERS function in sacrebleu

To help you get started, we’ve selected a few sacrebleu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ufal / neuralmonkey / neuralmonkey / evaluators / sacrebleu.py View on Github external
def __init__(self,
                 name: str,
                 smooth_method: str = "exp",
                 smooth_value: float = 0.0,
                 force: bool = False,
                 lowercase: bool = False,
                 tokenize: str = "none",
                 use_effective_order: bool = False) -> None:
        check_argument_types()
        super().__init__(name)

        if tokenize not in TOKENIZERS:
            raise ValueError(
                "Unknown tokenizer '{}'. You must use one of sacrebleu's "
                "tokenizers: {}".format(tokenize, str(TOKENIZERS)))

        if smooth_method not in SMOOTH_VARIANTS:
            raise ValueError(
                "Unknown smoothing '{}'. You must use one of sacrebleu's "
                "smoothing methods: {}".format(smooth_method,
                                               str(SMOOTH_VARIANTS)))

        self.smooth_method = smooth_method
        self.smooth_value = smooth_value
        self.force = force
        self.lowercase = lowercase
        self.tokenize = tokenize
        self.use_effective_order = use_effective_order
github chakki-works / sumeval / sumeval / metrics / bleu.py View on Github external
self.lowercase = lowercase
        self.use_effective_order = use_effective_order
        if isinstance(lang, str):
            self.lang = lang
            self._lang = get_lang(lang)
        elif isinstance(lang, BaseLang):
            self.lang = lang.lang
            self._lang = lang

        self._tokenizer = DEFAULT_TOKENIZER
        if self.lang == "ja":
            def tokenizer_ja(text):
                words = self._lang.tokenize_with_preprocess(text)
                return " ".join(words)

            TOKENIZERS["ja"] = tokenizer_ja
            self._tokenizer = "ja"
        elif self.lang == "zh":
            self._tokenizer = "zh"
github ufal / neuralmonkey / neuralmonkey / evaluators / sacrebleu.py View on Github external
def __init__(self,
                 name: str,
                 smooth_method: str = "exp",
                 smooth_value: float = 0.0,
                 force: bool = False,
                 lowercase: bool = False,
                 tokenize: str = "none",
                 use_effective_order: bool = False) -> None:
        check_argument_types()
        super().__init__(name)

        if tokenize not in TOKENIZERS:
            raise ValueError(
                "Unknown tokenizer '{}'. You must use one of sacrebleu's "
                "tokenizers: {}".format(tokenize, str(TOKENIZERS)))

        if smooth_method not in SMOOTH_VARIANTS:
            raise ValueError(
                "Unknown smoothing '{}'. You must use one of sacrebleu's "
                "smoothing methods: {}".format(smooth_method,
                                               str(SMOOTH_VARIANTS)))

        self.smooth_method = smooth_method
        self.smooth_value = smooth_value
        self.force = force
        self.lowercase = lowercase
        self.tokenize = tokenize
        self.use_effective_order = use_effective_order