@inproceedings{dalbo-2026-uga,
title = "{UGA} Threshold at {BEA} 2026 Shared Task 1: Predicting Vocabulary Acquisition Difficulty with Hand-Crafted {SLA}-Based Features",
author = "Dalbo, Emma",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.67/",
pages = "992--996",
ISBN = "979-8-89176-409-5",
abstract = "This paper describes a feature-based system submitted to the BEA 2026 Shared Task on Vocabulary Difficulty Prediction (closed track). The system models vocabulary difficulty for English learners using linguistically motivated features capturing frequency, cross-linguistic similarity, phonological and orthographic complexity, and semantic properties, supplemented by multilingual embeddings (reduced via PCA). Multiple regression models were evaluated using cross-validation, with final predictions generated from ensemble and single-model configurations per language.The system achieves competitive performance across all three L1 groups (German, Spanish, and Chinese), outperforming the XLM-RoBERTa baseline in seven of nine runs in terms of RMSE, with the strongest gains observed for Chinese and more modest improvements for Spanish. An ablation study further demonstrates that frequency and cross-linguistic similarity factors contribute most substantially to predictive performance, with effects varying across L1s. These findings highlight the role of interpretable linguistic features in modeling vocabulary difficulty in an L1-aware setting."
}Markdown (Informal)
[UGA Threshold at BEA 2026 Shared Task 1: Predicting Vocabulary Acquisition Difficulty with Hand-Crafted SLA-Based Features](https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.67/) (Dalbo, BEA 2026)
ACL