@inproceedings{gombert-etal-2025-tba,
title = "{TBA} at {BEA} 2025 Shared Task: Transfer-Learning from {DARE}-{TIES} Merged Models for the Pedagogical Ability Assessment of {LLM}-Powered Math Tutors",
author = "Gombert, Sebastian and
Zehner, Fabian and
Drachsler, Hendrik",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.bea-1.92/",
pages = "1173--1179",
ISBN = "979-8-89176-270-1",
abstract = "This paper presents our contribution to the BEA 2025 Shared Task on Pedagogical Ability Assessment of AI-Powered Tutors. The objective of this shared task was to assess the quality of conversational feedback provided by LLM-based math tutors to students regarding four facets: whether the tutors 1) identified mistakes, 2) identified the mistake{'}s location, 3) provided guidance, and whether they 4) provided actionable feedback. To leverage information across all four labels, we approached the problem with FLAN-T5 models, which we fit for this task using a multi-step pipeline involving regular fine-tuning as well as model merging using the DARE-TIES algorithm. We can demonstrate that our pipeline is beneficial to overall model performance compared to regular fine-tuning. With results on the test set ranging from 52.1 to 68.6 in F1 scores and 62.2{\%} to 87.4{\%} in accuracy, our best models placed 11th of 44 teams in Track 1, 8th of 31 teams in Track 2, 11th of 35 teams in Track 3, and 9th of 30 teams in Track 4. Notably, the classifiers' recall was relatively poor for underrepresented classes, indicating even greater potential for the employed methodology."
}
Markdown (Informal)
[TBA at BEA 2025 Shared Task: Transfer-Learning from DARE-TIES Merged Models for the Pedagogical Ability Assessment of LLM-Powered Math Tutors](https://preview.aclanthology.org/landing_page/2025.bea-1.92/) (Gombert et al., BEA 2025)
ACL