@inproceedings{watson-gkatzia-2024-reprohum,
title = "{R}epro{H}um {\#}0712-01: Reproducing Human Evaluation of Meaning Preservation in Paraphrase Generation",
author = "Watson, Lewis N. and
Gkatzia, Dimitra",
editor = "Balloccu, Simone and
Belz, Anya and
Huidrom, Rudali and
Reiter, Ehud and
Sedoc, Joao and
Thomson, Craig",
booktitle = "Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.humeval-1.19/",
pages = "221--228",
abstract = "Reproducibility is a cornerstone of scientific research, ensuring the reliability and generalisability of findings. The ReproNLP Shared Task on Reproducibility of Evaluations in NLP aims to assess the reproducibility of human evaluation studies. This paper presents a reproduction study of the human evaluation experiment presented in ``Hierarchical Sketch Induction for Paraphrase Generation'' by Hosking et al. (2022). The original study employed a human evaluation on Amazon Mechanical Turk, assessing the quality of paraphrases generated by their proposed model using three criteria: meaning preservation, fluency, and dissimilarity. In our reproduction study, we focus on the meaning preservation criterion and utilise the Prolific platform for participant recruitment, following the ReproNLP challenge{'}s common approach to reproduction. We discuss the methodology, results, and implications of our reproduction study, comparing them to the original findings. Our findings contribute to the understanding of reproducibility in NLP research and highlights the potential impact of platform changes and evaluation criteria on the reproducibility of human evaluation studies."
}
Markdown (Informal)
[ReproHum #0712-01: Reproducing Human Evaluation of Meaning Preservation in Paraphrase Generation](https://preview.aclanthology.org/fix-sig-urls/2024.humeval-1.19/) (Watson & Gkatzia, HumEval 2024)
ACL