@inproceedings{koutcheme-etal-2025-direct,
title = "Direct Repair Optimization: Training Small Language Models For Educational Program Repair Improves Feedback",
author = "Koutcheme, Charles and
Dainese, Nicola and
Hellas, Arto",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.41/",
pages = "564--581",
ISBN = "979-8-89176-270-1",
abstract = "Locally deployed Small Language Models (SLMs) offer a promising solution for providing timely and effective programming feedback to students learning to code. However, SLMs often produce misleading or hallucinated feedback, limiting their reliability in educational settings. Current approaches for improving SLM feedback rely on existing human annotations or LLM-generated feedback. This paper addresses a fundamental challenge: Can we improve SLMs' feedback capabilities without relying on human or LLM-generated annotations? We demonstrate that training SLMs on the proxy task of program repair is sufficient to enhance their ability to generate high-quality feedback. To this end, we introduce Direct Repair Optimization (DRO), a self-supervised online reinforcement learning strategy that trains language models to reason about how to efficiently fix students' programs.Our experiments, using DRO to fine-tune LLaMA-3.1{--}3B and Qwen-2.5{--}3B on a large-scale dataset of Python submissions from real students, show substantial improvements on downstream feedback tasks. We release our code to support further research in educational feedback and highlight promising directions for future work."
}
Markdown (Informal)
[Direct Repair Optimization: Training Small Language Models For Educational Program Repair Improves Feedback](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.41/) (Koutcheme et al., BEA 2025)
ACL