Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
reduced_predictions = []
for chunk in chunked_predictions:
torch.cuda.synchronize()
reduced_predictions += distributed_utils.all_gather_list(chunk, max_size=65000)
torch.cuda.synchronize()
with open(os.path.join(args.data, 'sacrebleu_reference.de'), 'r') as reference:
refs = [reference.readlines()]
#reducing indexed predictions as strings is more memory efficient than reducing tuples
predictions = [item for sublist in reduced_predictions for item in sublist]
predictions = [tuple(item.split('\t')) for item in predictions]
predictions = [(int(item[0]), item[1]) for item in predictions]
predictions.sort(key=lambda tup: tup[0])
predictions = [hypo[1] + ('\n' if hypo[1][-1]!='\n' else '') for hypo in predictions]
sacrebleu_score = sacrebleu.corpus_bleu(predictions, refs, lowercase=args.ignore_case)
print(f'|Detokenized {sacrebleu_score}')
if gen_timer.sum != 0:
print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)'.format(
num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1./gen_timer.avg))
if has_target:
print('| Generate {} with beam={}: {}'.format(subset, args.beam, scorer.result_string()))
print('| Eval completed in: {:.2f}s'.format(time.time()-begin))
return scorer.score(order=4), sacrebleu_score.score
def score_corpus_multiprocess(
self, hypothesis: List[str], references: List[List[str]]
) -> float:
tokenizer = get_optional_dict(self.extra_args, 'bp_tokenizer', 'none')
if self.n_workers == 1:
corpus_score = sb.corpus_bleu(
hypothesis, references, force=True, tokenize=tokenizer
).bp
else:
batches = list(
self._batch(hypothesis, references, n_batches=self.n_workers)
)
ref_len, sys_len = 0, 0
correct = [0 for _ in range(sb.NGRAM_ORDER)]
total = [0 for _ in range(sb.NGRAM_ORDER)]
with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
futures = [
executor.submit(
sb.corpus_bleu, b[0], b[1], force=True,
tokenize=tokenizer
)
for b in batches
tokenizer = get_optional_dict(self.extra_args, 'bleu_tokenizer', 'none')
if self.n_workers == 1:
corpus_score = sb.corpus_bleu(
hypothesis, references, force=True, tokenize=tokenizer
).score
else:
batches = list(
self._batch(hypothesis, references, n_batches=self.n_workers)
)
ref_len, sys_len = 0, 0
correct = [0 for _ in range(sb.NGRAM_ORDER)]
total = [0 for _ in range(sb.NGRAM_ORDER)]
with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
futures = [
executor.submit(
sb.corpus_bleu, b[0], b[1], force=True,
tokenize=tokenizer
)
for b in batches
]
progress = as_completed(futures)
if self.verbose:
progress = tqdm(progress)
for future in progress:
s = future.result()
ref_len += s.ref_len
sys_len += s.sys_len
for n in range(sb.NGRAM_ORDER):
correct[n] += s.counts[n]
total[n] += s.totals[n]
corpus_score = sb.compute_bleu(
correct, total, sys_len, ref_len, smooth_method='exp'
def score_corpus_multiprocess(
self, hypothesis: List[str], references: List[List[str]]
) -> float:
tokenizer = get_optional_dict(self.extra_args, 'bleu_tokenizer', 'none')
if self.n_workers == 1:
corpus_score = sb.corpus_bleu(
hypothesis, references, force=True, tokenize=tokenizer
).score
else:
batches = list(
self._batch(hypothesis, references, n_batches=self.n_workers)
)
ref_len, sys_len = 0, 0
correct = [0 for _ in range(sb.NGRAM_ORDER)]
total = [0 for _ in range(sb.NGRAM_ORDER)]
with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
futures = [
executor.submit(
sb.corpus_bleu, b[0], b[1], force=True,
tokenize=tokenizer
)
for b in batches
with MosesDetokenizer(args.language) as detokenize:
item = detokenize(item.split(" "))
thefile.write("%s\n" % item)
if args.eval:
inputfh = open(save_path, 'r')
system = inputfh.readlines()
inputref = open(args.target_translation, 'r')
ref = inputref.readlines()
print(str(args.id) + " "+ str(sacrebleu.corpus_bleu(system, [ref]).score) + " " + save_path)
def sentence_bleu(hypothesis, reference):
bleu = _corpus_bleu(hypothesis, reference)
for i in range(1, 4):
bleu.counts[i] += 1
bleu.totals[i] += 1
bleu = compute_bleu(
bleu.counts, bleu.totals,
bleu.sys_len, bleu.ref_len,
smooth='exp', smooth_floor=0.0,
)
return bleu.score
def __call__(self, loss, output, target):
""" Computes the BLEU score of a translation task
Args:
loss (:obj:`torch.Tensor`): Not Used
output (:obj:`torch.Tensor`): Translated output (not tokenized)
target (:obj:`torch.Tensor`): Target labels
Returns:
float: BLEU score
"""
return torch.tensor(
[
sacrebleu.corpus_bleu(
output, [target], tokenize="intl", lowercase=True
).score
def corpus_bleu(sys_sents: List[str],
refs_sents: List[List[str]],
smooth_method: str = 'exp',
smooth_value: float = None,
force: bool = True,
lowercase: bool = False,
tokenizer: str = '13a',
use_effective_order: bool = False):
sys_sents = [utils_prep.normalize(sent, lowercase, tokenizer) for sent in sys_sents]
refs_sents = [[utils_prep.normalize(sent, lowercase, tokenizer) for sent in ref_sents]
for ref_sents in refs_sents]
return sacrebleu.corpus_bleu(sys_sents, refs_sents, smooth_method, smooth_value, force,
lowercase=False, tokenize='none', use_effective_order=use_effective_order).score
def score(fdsys):
with open(args.ref) as fdref:
print(sacrebleu.corpus_bleu(fdsys, [fdref]))
elif args.sentence_bleu: