@inproceedings{zhou-etal-2026-comparative,
title = "Comparative Evaluation of {AI}-Generated vs. Expert-written Answer Explanations for a Medical Education Self-Assessment",
author = "Zhou, Yiyun and
O{'}Donnell, Francis and
Yaneva, Victoria",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.31/",
pages = "455--462",
ISBN = "979-8-89176-409-5",
abstract = "Answer explanations for medical multiple-choice questions (MCQs) are a valuable learning tool, but producing them is resource intensive. Writing high quality explanations requires specialized medical expertise and careful alignment with the keyed answer, distractors, and the clinical vignette. This paper evaluates whether a template-aware, retrieval-guided large language model (LLM) workflow can support this production task in a real formative assessment setting. Using a 50-item medical education self-assessment, we compared AI-generated and expert-written MCQ explanations in a blinded study involving eight medical faculty and sixteen medical students. Each participant rated 25 of 50 paired explanations on clarity, amount of information, and structure. The clearest empirical difference was in amount of information: AI-generated explanations were rated significantly higher than expert-written explanations in a cumulative link mixed model analysis (OR = 1.99, 95{\%} CI [1.33, 2.99], p = 0.001). Ratings of clarity and structure did not differ significantly between conditions. Based on faculty ratings, a smaller proportion of AI-generated explanations were judged to require correction (20{\%}) compared with expert-written explanations (38{\%}). These findings suggest that AI can reduce first-draft authoring effort in explanation writing while still requiring expert review to ensure content accuracy."
}Markdown (Informal)
[Comparative Evaluation of AI-Generated vs. Expert-written Answer Explanations for a Medical Education Self-Assessment](https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.31/) (Zhou et al., BEA 2026)
ACL