We argue that the machine translation community is overly reliant on the Bleu machine translation evaluation metric. We show that an improved Bleu score is neither necessary nor sufficient for achieving an actual improvement in translation quality, and give two significant counterexamples to Bleu’s correlation with human judgments of quality. This offers new potential for research which was previously deemed unpromising by an inability to improve upon Bleu scores.
%0 Conference Paper
%1 callisonburch2006rrb
%A Callison-Burch, Chris
%A Osborne, Miles
%B Proceedings of EACL
%D 2006
%K SMT bleu evaluation
%P 249--256
%T Re-evaluating the role of BLEU in machine translation research
%V 2006
%X We argue that the machine translation community is overly reliant on the Bleu machine translation evaluation metric. We show that an improved Bleu score is neither necessary nor sufficient for achieving an actual improvement in translation quality, and give two significant counterexamples to Bleu’s correlation with human judgments of quality. This offers new potential for research which was previously deemed unpromising by an inability to improve upon Bleu scores.
@inproceedings{callisonburch2006rrb,
abstract = {We argue that the machine translation community is overly reliant on the Bleu machine translation evaluation metric. We show that an improved Bleu score is neither necessary nor sufficient for achieving an actual improvement in translation quality, and give two significant counterexamples to Bleu’s correlation with human judgments of quality. This offers new potential for research which was previously deemed unpromising by an inability to improve upon Bleu scores. },
added-at = {2009-09-11T18:43:14.000+0200},
author = {Callison-Burch, Chris and Osborne, Miles},
biburl = {https://www.bibsonomy.org/bibtex/26f048fa7a2da9286969a0895e8e0f424/jimregan},
booktitle = {Proceedings of EACL},
interhash = {c97f2d393b375b219d56a2f0dab0e53e},
intrahash = {6f048fa7a2da9286969a0895e8e0f424},
keywords = {SMT bleu evaluation},
pages = {249--256},
timestamp = {2009-09-11T18:43:36.000+0200},
title = {Re-evaluating the role of BLEU in machine translation research},
volume = 2006,
year = 2006
}