@inproceedings{hossain-alve-2026-failure,
title = "Failure at {BEA} 2026 Shared Task 1: One Pipeline, Three L1s: A Unified Language-Agnostic System for Vocabulary Difficulty Prediction",
author = "Hossain, Abid and
Alve, Kamruzzaman Khan",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.73/",
pages = "1041--1046",
ISBN = "979-8-89176-409-5",
abstract = "We present a unified, language-agnostic system for the BEA 2026 Shared Task on vocabulary difficulty prediction. The system uses a single training pipeline across Spanish, German, and Mandarin Chinese without any language-specific adaptation. Input features include serialized text fields and four scalar length-based features, processed using an XLM-RoBERTa encoder with attention-mask-weighted mean pooling. Hyperparameters are tuned with Optuna under reduced cross-validation, followed by full 5-fold training and checkpoint-based ensembling.Our approach improves over the official closed-track baseline across all three L1 conditions, demonstrating that a shared architecture and training strategy can yield consistent gains without language-specific engineering. Error analysis shows higher prediction error at difficulty extremes, suggesting a regression-to-the-mean tendency."
}Markdown (Informal)
[Failure at BEA 2026 Shared Task 1: One Pipeline, Three L1s: A Unified Language-Agnostic System for Vocabulary Difficulty Prediction](https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.73/) (Hossain & Alve, BEA 2026)
ACL