@inproceedings{tiwari-rastogi-2025-phaedrus,
title = "Phaedrus at {BEA} 2025 Shared Task: Assessment of Mathematical Tutoring Dialogues through Tutor Identity Classification and Actionability Evaluation",
author = "Tiwari, Rajneesh and
Rastogi, Pranshu",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.bea-1.85/",
pages = "1098--1107",
ISBN = "979-8-89176-270-1",
abstract = "As Large Language Models (LLMs) are increasingly deployed in educational environments, two critical challenges emerge: identifying the source of tutoring responses and evaluating their pedagogical effectiveness. This paper presents our comprehensive approach to the BEA 2025 Shared Task, addressing both tutor identity classification (Track 5) and actionability assessment (Track 4) in mathematical tutoring dialogues. For tutor identity classification, we distinguish between human tutors (expert/novice) and seven distinct LLMs using cross-response context augmentation and ensemble techniques. For actionability assessment, we evaluate whether responses provide clear guidance on student next steps using selective attention masking and instruction-guided training. Our multi-task approach combines transformer-based models with innovative contextual feature engineering, achieving state-of-the-art performance with a CV macro F1 score of 0.9596 (test set 0.9698) for identity classification and 0.655 (test set Strict F1 0.6906) for actionability assessment. We were able to score rank 5th in Track 4 and rank 1st in Track 5. Our analysis reveals that despite advances in human-like responses, LLMs maintain detectable fingerprints while showing varying levels of pedagogical actionability, with important implications for educational technology development and deployment."
}
Markdown (Informal)
[Phaedrus at BEA 2025 Shared Task: Assessment of Mathematical Tutoring Dialogues through Tutor Identity Classification and Actionability Evaluation](https://preview.aclanthology.org/landing_page/2025.bea-1.85/) (Tiwari & Rastogi, BEA 2025)
ACL