@inproceedings{braun-2025-reprohum,
title = "{R}epro{H}um {\#}0031-01: Reproducing the Human Evaluation of Readability from ``It is {AI}{'}s Turn to Ask Humans a Question''",
author = "Braun, Daniel",
editor = "Dhole, Kaustubh and
Clinciu, Miruna",
booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
month = jul,
year = "2025",
address = "Vienna, Austria and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/transition-to-people-yaml/2025.gem-1.52/",
pages = "576--582",
ISBN = "979-8-89176-261-9",
abstract = "The reproducibility of results is the foundation on which scientific credibility is built. In Natural Language Processing (NLP) research, human evaluation is often seen as the gold standard of evaluation. This paper presents the reproduction of a human evaluation of a Natural Language Generation (NLG) system that generates pairs of questions and answers based on children{'}s stories that was originally conducted by Yao et al. (2022). Specifically, it replicates the evaluation of readability, one of the most commonly evaluated criteria for NLG systems. The results of the reproduction are aligned with the original findings and all major claims of the original paper are confirmed."
}
Markdown (Informal)
[ReproHum #0031-01: Reproducing the Human Evaluation of Readability from “It is AI’s Turn to Ask Humans a Question”](https://preview.aclanthology.org/transition-to-people-yaml/2025.gem-1.52/) (Braun, GEM 2025)
ACL