@inproceedings{laubli-etal-2018-machine,
title = "Has Machine Translation Achieved Human Parity? A Case for Document-level Evaluation",
author = {L{\"a}ubli, Samuel and
Sennrich, Rico and
Volk, Martin},
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/D18-1512/",
doi = "10.18653/v1/D18-1512",
pages = "4791--4796",
abstract = "Recent research suggests that neural machine translation achieves parity with professional human translation on the WMT Chinese{--}English news translation task. We empirically test this claim with alternative evaluation protocols, contrasting the evaluation of single sentences and entire documents. In a pairwise ranking experiment, human raters assessing adequacy and fluency show a stronger preference for human over machine translation when evaluating documents as compared to isolated sentences. Our findings emphasise the need to shift towards document-level evaluation as machine translation improves to the degree that errors which are hard or impossible to spot at the sentence-level become decisive in discriminating quality of different translation outputs."
}
Markdown (Informal)
[Has Machine Translation Achieved Human Parity? A Case for Document-level Evaluation](https://preview.aclanthology.org/add-emnlp-2024-awards/D18-1512/) (Läubli et al., EMNLP 2018)
ACL