@inproceedings{cho-etal-2026-aida,
title = "{AIDA} at {BEA} 2026 Shared Task 1: A Two-Stage Framework for {L}1-Aware Vocabulary Difficulty Prediction with Representation Diversity and Residual Calibration",
author = "Cho, Seok Hyeon and
Choi, JunHyeok and
Ji, Sangeun and
Han, Sung Won",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.72/",
pages = "1029--1040",
ISBN = "979-8-89176-409-5",
abstract = "We study vocabulary difficulty prediction for second language (L2) learners, a key component for adaptive language learning and assessment. Existing approaches often treat difficulty as an intrinsic property of words or contexts, overlooking representation-dependent variation and learner-specific factors such as L1 transfer.We participate in the BEA 2026 Shared Task Closed Track using the Spanish (L1) subset of the KVL dataset. We propose a two-stage framework that decouples representation learning from learner-aware calibration. Stage 1 constructs diverse representations using multiple pretrained encoders with varied pooling and prediction strategies, capturing complementary aspects of lexical and contextual complexity. Stage 2 models systematic residual errors with psycholinguistic and cross-lingual features, enabling explicit correction of prediction biases.Experiments show that our method outperforms strong baselines, improving RMSE (1.257 -{\ensuremath{>}} 0.976) and correlation (0.765 -{\ensuremath{>}} 0.857). These results highlight the importance of jointly modeling representation diversity and learner-specific effects. Our system ranked 3rd in the official BEA 2026 Shared Task Closed Track."
}Markdown (Informal)
[AIDA at BEA 2026 Shared Task 1: A Two-Stage Framework for L1-Aware Vocabulary Difficulty Prediction with Representation Diversity and Residual Calibration](https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.72/) (Cho et al., BEA 2026)
ACL