How to use the sacrebleu.corpus_bleu function in sacrebleu

To help you get started, we’ve selected a few sacrebleu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NVIDIA / DeepLearningExamples / PyTorch / Translation / Transformer / train.py View on Github external
reduced_predictions = []
        for chunk in chunked_predictions:
            torch.cuda.synchronize()
            reduced_predictions += distributed_utils.all_gather_list(chunk, max_size=65000)
            torch.cuda.synchronize()

    with open(os.path.join(args.data, 'sacrebleu_reference.de'), 'r') as reference:
        refs = [reference.readlines()]
    #reducing indexed predictions as strings is more memory efficient than reducing tuples
    predictions = [item for sublist in reduced_predictions for item in sublist]
    predictions = [tuple(item.split('\t')) for item in predictions]
    predictions = [(int(item[0]), item[1]) for item in predictions]
    predictions.sort(key=lambda tup: tup[0])
    predictions = [hypo[1] + ('\n' if hypo[1][-1]!='\n' else '')  for hypo in predictions]
    sacrebleu_score = sacrebleu.corpus_bleu(predictions, refs, lowercase=args.ignore_case)
    print(f'|Detokenized {sacrebleu_score}')
    if gen_timer.sum != 0:
        print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)'.format(
            num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1./gen_timer.avg))
    if has_target:
        print('| Generate {} with beam={}: {}'.format(subset, args.beam, scorer.result_string()))

    print('| Eval completed in: {:.2f}s'.format(time.time()-begin))

    return scorer.score(order=4), sacrebleu_score.score
github facebookresearch / vizseq / vizseq / scorers / bp.py View on Github external
def score_corpus_multiprocess(
            self, hypothesis: List[str], references: List[List[str]]
    ) -> float:
        tokenizer = get_optional_dict(self.extra_args, 'bp_tokenizer', 'none')
        if self.n_workers == 1:
            corpus_score = sb.corpus_bleu(
                hypothesis, references, force=True, tokenize=tokenizer
            ).bp
        else:
            batches = list(
                self._batch(hypothesis, references, n_batches=self.n_workers)
            )
            ref_len, sys_len = 0, 0
            correct = [0 for _ in range(sb.NGRAM_ORDER)]
            total = [0 for _ in range(sb.NGRAM_ORDER)]
            with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
                futures = [
                    executor.submit(
                        sb.corpus_bleu, b[0], b[1], force=True,
                        tokenize=tokenizer
                    )
                    for b in batches
github facebookresearch / vizseq / vizseq / scorers / bleu.py View on Github external
tokenizer = get_optional_dict(self.extra_args, 'bleu_tokenizer', 'none')
        if self.n_workers == 1:
            corpus_score = sb.corpus_bleu(
                hypothesis, references, force=True, tokenize=tokenizer
            ).score
        else:
            batches = list(
                self._batch(hypothesis, references, n_batches=self.n_workers)
            )
            ref_len, sys_len = 0, 0
            correct = [0 for _ in range(sb.NGRAM_ORDER)]
            total = [0 for _ in range(sb.NGRAM_ORDER)]
            with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
                futures = [
                    executor.submit(
                        sb.corpus_bleu, b[0], b[1], force=True,
                        tokenize=tokenizer
                    )
                    for b in batches
                ]
                progress = as_completed(futures)
                if self.verbose:
                    progress = tqdm(progress)
                for future in progress:
                    s = future.result()
                    ref_len += s.ref_len
                    sys_len += s.sys_len
                    for n in range(sb.NGRAM_ORDER):
                        correct[n] += s.counts[n]
                        total[n] += s.totals[n]
                corpus_score = sb.compute_bleu(
                    correct, total, sys_len, ref_len, smooth_method='exp'
github facebookresearch / vizseq / vizseq / scorers / bleu.py View on Github external
def score_corpus_multiprocess(
            self, hypothesis: List[str], references: List[List[str]]
    ) -> float:
        tokenizer = get_optional_dict(self.extra_args, 'bleu_tokenizer', 'none')
        if self.n_workers == 1:
            corpus_score = sb.corpus_bleu(
                hypothesis, references, force=True, tokenize=tokenizer
            ).score
        else:
            batches = list(
                self._batch(hypothesis, references, n_batches=self.n_workers)
            )
            ref_len, sys_len = 0, 0
            correct = [0 for _ in range(sb.NGRAM_ORDER)]
            total = [0 for _ in range(sb.NGRAM_ORDER)]
            with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
                futures = [
                    executor.submit(
                        sb.corpus_bleu, b[0], b[1], force=True,
                        tokenize=tokenizer
                    )
                    for b in batches
github ofirpress / YouMayNotNeedAttention / model / generate.py View on Github external
with MosesDetokenizer(args.language) as detokenize:
            item = detokenize(item.split(" "))


        thefile.write("%s\n" % item)


if args.eval:
    inputfh = open(save_path, 'r')
    system = inputfh.readlines()

    inputref = open(args.target_translation, 'r')
    ref = inputref.readlines()

    print(str(args.id) + "  "+ str(sacrebleu.corpus_bleu(system, [ref]).score) + " " + save_path)
github freewym / espresso / examples / translation_moe / score.py View on Github external
def sentence_bleu(hypothesis, reference):
    bleu = _corpus_bleu(hypothesis, reference)
    for i in range(1, 4):
        bleu.counts[i] += 1
        bleu.totals[i] += 1
    bleu = compute_bleu(
        bleu.counts, bleu.totals,
        bleu.sys_len, bleu.ref_len,
        smooth='exp', smooth_floor=0.0,
    )
    return bleu.score
github mlbench / mlbench-core / mlbench_core / evaluation / pytorch / metrics.py View on Github external
def __call__(self, loss, output, target):
        """ Computes the BLEU score of a translation task

        Args:
            loss (:obj:`torch.Tensor`): Not Used
            output (:obj:`torch.Tensor`): Translated output (not tokenized)
            target (:obj:`torch.Tensor`): Target labels

        Returns:
            float: BLEU score
        """
        return torch.tensor(
            [
                sacrebleu.corpus_bleu(
                    output, [target], tokenize="intl", lowercase=True
                ).score
github feralvam / easse / easse / bleu.py View on Github external
def corpus_bleu(sys_sents: List[str],
                refs_sents: List[List[str]],
                smooth_method: str = 'exp',
                smooth_value: float = None,
                force: bool = True,
                lowercase: bool = False,
                tokenizer: str = '13a',
                use_effective_order: bool = False):

    sys_sents = [utils_prep.normalize(sent, lowercase, tokenizer) for sent in sys_sents]
    refs_sents = [[utils_prep.normalize(sent, lowercase, tokenizer) for sent in ref_sents]
                  for ref_sents in refs_sents]

    return sacrebleu.corpus_bleu(sys_sents, refs_sents, smooth_method, smooth_value, force,
                                 lowercase=False, tokenize='none', use_effective_order=use_effective_order).score
github freewym / espresso / score.py View on Github external
def score(fdsys):
            with open(args.ref) as fdref:
                print(sacrebleu.corpus_bleu(fdsys, [fdref]))
    elif args.sentence_bleu: