@inproceedings{manlises-etal-2025-dlsu,
title = "{DLSU} at {BEA} 2025 Shared Task: Towards Establishing Baseline Models for Pedagogical Response Evaluation Tasks",
author = "Manlises, Maria Monica and
Gonzales, Mark Edward and
Lim, Lanz",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.bea-1.101/",
pages = "1260--1265",
ISBN = "979-8-89176-270-1",
abstract = "We present our submission for Tracks 3 (Providing Guidance), 4 (Actionability), and 5 (Tutor Identification) of the BEA 2025 Shared Task on Pedagogical Ability Assessment of AI-Powered Tutors. Our approach sought to investigate the performance of directly using sentence embeddings of tutor responses as input to downstream classifiers (that is, without employing any fine-tuning). To this end, we benchmarked two general-purpose sentence embedding models: gte-modernbert-base (GTE) and all-MiniLM-L12-v2, in combination with two downstream classifiers: XGBoost and multilayer perceptron. Feeding GTE embeddings to a multilayer perceptron achieved macro-F1 scores of 0.4776, 0.5294, and 0.6420 on the official test sets for Tracks 3, 4, and 5, respectively. While overall performance was modest, these results offer insights into the challenges of pedagogical response evaluation and establish a baseline for future improvements."
}
Markdown (Informal)
[DLSU at BEA 2025 Shared Task: Towards Establishing Baseline Models for Pedagogical Response Evaluation Tasks](https://preview.aclanthology.org/landing_page/2025.bea-1.101/) (Manlises et al., BEA 2025)
ACL