@inproceedings{junker-2025-reprohum,
title = "{R}epro{H}um {\#}0729-04: Human Evaluation Reproduction Report for ``{M}em{S}um: Extractive Summarization of Long Documents Using Multi-Step Episodic {M}arkov Decision Processes''",
author = "Junker, Simeon",
editor = "Dhole, Kaustubh and
Clinciu, Miruna",
booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
month = jul,
year = "2025",
address = "Vienna, Austria and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.50/",
pages = "561--567",
ISBN = "979-8-89176-261-9",
abstract = "Human evaluation is indispensable in natural language processing (NLP), as automatic metrics are known to not always align well with human judgments.However, the reproducibility of human evaluations can be problematic since results are susceptible to many factors, the details of which are often missing from the respective works.As part of the ReproHum project, this work aims to reproduce the human evaluation of a single criterion in the paper ``MemSum: Extractive Summarization of Long Documents Using Multi-Step Episodic Markov Decision Processes'' (Gu et al, 2022).The results of our reproduction differ noticeably from those of the original study. To explain this discrepancy, we discuss differences in the experimental setup, as well as more general characteristics of the selected domain and the generated summaries."
}
Markdown (Informal)
[ReproHum #0729-04: Human Evaluation Reproduction Report for “MemSum: Extractive Summarization of Long Documents Using Multi-Step Episodic Markov Decision Processes”](https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.50/) (Junker, GEM 2025)
ACL