@inproceedings{zhong-etal-2026-revisiting,
title = "Revisiting Faithfulness Annotations for Long-form Summaries",
author = "Zhong, Yang and
Liu, Yang Janet and
Litman, Diane",
editor = "Liu, Yang Janet and
Gessler, Luke",
booktitle = "Proceedings of the 20th Linguistic Annotation Workshop ({LAW} {XX})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.law-main.12/",
pages = "158--172",
ISBN = "979-8-89176-404-0",
abstract = "Benchmarks for long-form summaries (four or more sentences) generated by language models increasingly serve as gold-standard references for developing, evaluating, and comparing faithfulness-checking systems. As their influence grows, understanding the challenges of annotating faithfulness errors within long, discourse-rich summaries becomes critical. We revisit three benchmarks spanning diverse text types and contrasting annotation designs. Using a discourse-aware evaluation framework together with human auditing, we identify cases where benchmark labels may be unreliable. Manual verification shows that 3.4{\%}-5.4{\%} of sentence-level labels warrant revision due to discourse-level inconsistencies that standard annotation procedures overlook. We introduce a taxonomy of five recurring annotation error types, propose revised labels, and show that correcting these cases leads to meaningful shifts in system rankings. We conclude with recommendations for future annotation practices."
}Markdown (Informal)
[Revisiting Faithfulness Annotations for Long-form Summaries](https://preview.aclanthology.org/ingest-acl-workshops/2026.law-main.12/) (Zhong et al., LAW 2026)
ACL