@inproceedings{kamoda-etal-2025-quantifying,
title = "Quantifying the Influence of Evaluation Aspects on Long-Form Response Assessment",
author = "Kamoda, Go and
Asai, Akari and
Brassard, Ana and
Sakaguchi, Keisuke",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.588/",
pages = "8787--8808",
abstract = "Evaluating the outputs of large language models (LLMs) on long-form generative tasks remains challenging. While fine-grained, aspect-wise evaluations provide valuable diagnostic information, they are difficult to design exhaustively, and each aspect{'}s contribution to the overall acceptability of an answer is unclear. In this study, we propose a method to compute an overall quality score as a weighted average of three key aspects: factuality, informative- ness, and formality. This approach achieves stronger correlations with human judgments compared to previous metrics. Our analysis identifies factuality as the most predictive aspect of overall quality. Additionally, we release a dataset of 1.2k long-form QA answers annotated with both absolute judgments and relative preferences in overall and aspect-wise schemes to aid future research in evaluation practices."
}
Markdown (Informal)
[Quantifying the Influence of Evaluation Aspects on Long-Form Response Assessment](https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.588/) (Kamoda et al., COLING 2025)
ACL