@inproceedings{belz-etal-2024-qcet-interactive,
title = "{QCET}: An Interactive Taxonomy of Quality Criteria for Comparable and Repeatable Evaluation of {NLP} Systems",
author = "Belz, Anya and
Mille, Simon and
Thomson, Craig and
Huidrom, Rudali",
editor = "Mahamood, Saad and
Minh, Nguyen Le and
Ippolito, Daphne",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference: System Demonstrations",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.inlg-demos.4/",
pages = "9--12",
abstract = "Four years on from two papers (Belz et al., 2020; Howcroft et al., 2020) that first called out the lack of standardisation and comparability in the quality criteria assessed in NLP system evaluations, researchers still use widely differing quality criteria names and definitions, meaning that it continues to be unclear when the same aspect of quality is being assessed in two evaluations. While normalised quality criteria were proposed at the time, the list was unwieldy and using it came with a steep learning curve. In this demo paper, our aim is to address these issues with an interactive taxonomy tool that enables quick perusal and selection of the quality criteria, and provides decision support and examples of use at each node."
}
Markdown (Informal)
[QCET: An Interactive Taxonomy of Quality Criteria for Comparable and Repeatable Evaluation of NLP Systems](https://preview.aclanthology.org/fix-sig-urls/2024.inlg-demos.4/) (Belz et al., INLG 2024)
ACL