@inproceedings{dekmak-etal-2025-tutormind,
title = "{T}utor{M}ind at {BEA} 2025 Shared Task: Leveraging Fine-Tuned {LLM}s and Data Augmentation for Mistake Identification",
author = "Dekmak, Fatima and
Khairallah, Christian and
Antoun, Wissam",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.96/",
pages = "1203--1211",
ISBN = "979-8-89176-270-1",
abstract = "In light of the growing adoption of large language models (LLMs) as educational tutors, it is crucial to effectively evaluate their pedagogical capabilities across multiple dimensions. Toward this goal, we address the Mistake Identification sub-task of the BEA 2025 Shared task, aiming to assess the accuracy of tutors in detecting and identifying student errors. We experiment with several LLMs, including GPT-4o-mini, Mistral-7B, and Llama-3.1-8B, evaluating them in both zero-shot and fine-tuned settings. To address class imbalance, we augment the training data with synthetic examples, targeting underrepresented labels, generated by Command R+. Our GPT-4o model finetuned on the full development set achieves a strict macro-averaged F1 score of 71.63{\%}, ranking second in the shared task. Our work highlights the effectiveness of fine-tuning on task-specific data and suggests that targeted data augmentation can further support LLM performance on nuanced pedagogical evaluation tasks."
}
Markdown (Informal)
[TutorMind at BEA 2025 Shared Task: Leveraging Fine-Tuned LLMs and Data Augmentation for Mistake Identification](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.96/) (Dekmak et al., BEA 2025)
ACL