@inproceedings{guillou-hardmeier-2018-automatic,
title = "Automatic Reference-Based Evaluation of Pronoun Translation Misses the Point",
author = "Guillou, Liane and
Hardmeier, Christian",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/D18-1513/",
doi = "10.18653/v1/D18-1513",
pages = "4797--4802",
abstract = "We compare the performance of the APT and AutoPRF metrics for pronoun translation against a manually annotated dataset comprising human judgements as to the correctness of translations of the PROTEST test suite. Although there is some correlation with the human judgements, a range of issues limit the performance of the automated metrics. Instead, we recommend the use of semi-automatic metrics and test suites in place of fully automatic metrics."
}
Markdown (Informal)
[Automatic Reference-Based Evaluation of Pronoun Translation Misses the Point](https://preview.aclanthology.org/fix-sig-urls/D18-1513/) (Guillou & Hardmeier, EMNLP 2018)
ACL