@inproceedings{calo-etal-2025-lessons,
title = "Lessons from a User Experience Evaluation of {NLP} Interfaces",
author = "Cal{\`o}, Eduardo and
Penkert, Lydia and
Mahamood, Saad",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.159/",
pages = "2915--2929",
ISBN = "979-8-89176-195-7",
abstract = "Human evaluations lay at the heart of evaluations within the field of Natural Language Processing (NLP). Seen as the ``golden standard'' of evaluations, questions are being asked on whether these evaluations are both reproducible and repeatable. One overlooked aspect is the design choices made by researchers when designing user interfaces (UIs). In this paper, four UIs used in past NLP human evaluations are assessed by UX experts, based on standardized human-centered interaction principles. Building on these insights, we derive several recommendations that the NLP community should apply when designing UIs, to enable more consistent human evaluation responses."
}
Markdown (Informal)
[Lessons from a User Experience Evaluation of NLP Interfaces](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.159/) (Calò et al., Findings 2025)
ACL