@inproceedings{castilho-2021-towards,
title = "Towards Document-Level Human {MT} Evaluation: On the Issues of Annotator Agreement, Effort and Misevaluation",
author = "Castilho, Sheila",
editor = "Belz, Anya and
Agarwal, Shubham and
Graham, Yvette and
Reiter, Ehud and
Shimorina, Anastasia",
booktitle = "Proceedings of the Workshop on Human Evaluation of NLP Systems (HumEval)",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/moar-dois/2021.humeval-1.4/",
pages = "34--45",
abstract = "Document-level human evaluation of machine translation (MT) has been raising interest in the community. However, little is known about the issues of using document-level methodologies to assess MT quality. In this article, we compare the inter-annotator agreement (IAA) scores, the effort to assess the quality in different document-level methodologies, and the issue of misevaluation when sentences are evaluated out of context."
}
Markdown (Informal)
[Towards Document-Level Human MT Evaluation: On the Issues of Annotator Agreement, Effort and Misevaluation](https://preview.aclanthology.org/moar-dois/2021.humeval-1.4/) (Castilho, HumEval 2021)
ACL