@inproceedings{trainin-abend-2025-t5score,
title = "$T^5Score$: A Methodology for Automatically Assessing the Quality of {LLM} Generated Multi-Document Topic Sets",
author = "Trainin, Itamar and
Abend, Omri",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1351/",
pages = "26347--26375",
ISBN = "979-8-89176-256-5",
abstract = "Using LLMs for Multi-Document Topic Extraction has recently gained popularity due to their apparent high-quality outputs, expressiveness, and ease of use. However, most existing evaluation practices are not designed for LLM-generated topics and result in low inter-annotator agreement scores, hindering the reliable use of LLMs for the task. To address this, we introduce $T^5Score$, an evaluation methodology that decomposes the quality of a topic set into quantifiable aspects, measurable through easy-to-perform annotation tasks. This framing enables a convenient, manual or automatic, evaluation procedure resulting in a strong inter-annotator agreement score.To substantiate our methodology and claims, we perform extensive experimentation on multiple datasets and report the results."
}
Markdown (Informal)
[T5Score: A Methodology for Automatically Assessing the Quality of LLM Generated Multi-Document Topic Sets](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1351/) (Trainin & Abend, Findings 2025)
ACL