@inproceedings{pit-2025-henry,
title = "Henry at {BEA} 2025 Shared Task: Improving {AI} Tutor{'}s Guidance Evaluation Through Context-Aware Distillation",
author = "Pit, Henry",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.bea-1.91/",
pages = "1164--1172",
ISBN = "979-8-89176-270-1",
abstract = "Effective AI tutoring hinges on guiding learners with the right balance of support. In this work, we introduce \textbf{CODE} (\textbf{CO}ntextually-aware \textbf{D}istilled \textbf{E}valuator), a framework that harnesses advanced large language models (i.e., GPT-4o and Claude-2.7) to generate synthetic, context-aware justifications for human-annotated tutor responses in the BEA 2025 Shared Task. By distilling these justifications into a smaller open-source model (i.e, Phi-3.5-mini-instruct) via initial supervised fine-tuning and then Group Relative Policy Optimization, we achieve substantial gains in label prediction over direct prompting of proprietary LLMs. Our experiments show that \textbf{CODE} reliably identifies strong positive and negative guidance, but like prior work, struggles to distinguish nuanced ``middle-ground'' cases where partial hints blur with vagueness. We argue that overcoming this limitation will require the development of explicit, feature-based evaluation metrics that systematically map latent pedagogical qualities to model outputs, enabling more transparent and robust assessment of AI-driven tutoring."
}
Markdown (Informal)
[Henry at BEA 2025 Shared Task: Improving AI Tutor’s Guidance Evaluation Through Context-Aware Distillation](https://preview.aclanthology.org/landing_page/2025.bea-1.91/) (Pit, BEA 2025)
ACL