@inproceedings{robaina-etal-2026-retuyt,
title = "{RETUYT}-{INCO} at {BEA} 2026 Shared Task 1: Feature-Enriched m{D}e{BERT}a for Word Difficulty Prediction",
author = "Robaina, Santiago and
Ros{\'a}, Aiala and
Chiruzzo, Luis",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.79/",
pages = "1113--1118",
ISBN = "979-8-89176-409-5",
abstract = "We describe the RETUYT-INCO participation in the BEA 2026 Shared Task on Vocabulary Difficulty Prediction for English Learners, a regression task that predicts GLMM psychometric difficulty scores for English target words given an L1 cue (Spanish, German, and Mandarin). We submitted two systems to the closed track (which restricts participants to the provided shared-task data and standard NLP resources, excluding external corpora and large language models): a feature-engineered XGBoost regressor for all three L1s, and, for Spanish, a 3-seed ensemble of mdeberta-v3-base fine-tuned with the same handcrafted features prepended as input text tokens. Our best test result is 1.094 RMSE on Spanish (ensemble), a 13.0{\%} reduction over the XLM-RoBERTa-base closed baseline. We highlight two findings. First, a LaBSE cross-lingual cosine between the L1 source word and the English target word is the largest single-feature addition in our incremental ablation, reducing average development-split (dev) RMSE by 0.091 on top of an already strong string/frequency/POS feature set. Second, feature-only XGBoost, with no neural fine-tuning and no GPU, already beats the XLM-RoBERTa-base closed-track development baseline on average across the three L1s (1.273 vs. 1.287 RMSE)."
}Markdown (Informal)
[RETUYT-INCO at BEA 2026 Shared Task 1: Feature-Enriched mDeBERTa for Word Difficulty Prediction](https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.79/) (Robaina et al., BEA 2026)
ACL