@inproceedings{parashar-mathias-2026-token,
title = "Token Titans at {BEA} 2026 Shared Task 1: Multilingual Lexical Complexity Prediction via Fine-Tuned {XLM}-{R}o{BERT}a with Ensemble Decoding",
author = "Parashar, Anubhab and
Mathias, Sandeep",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.80/",
pages = "1119--1123",
ISBN = "979-8-89176-409-5",
abstract = "We describe our submission to the BEA 2026 Shared Task on Multilingual Lexical Complexity Prediction. The system fine-tunes XLM-RoBERTa Large separately for Spanish, German, and Chinese, feeding each instance as a flat concatenation of the source word, its sentential context, an English clue, and the English target word. Training uses z-score label normalization and two independent runs thatdiffer in learning rate, scheduler, and random seed; a weighted ensemble of their predictions (0.6/0.4) consistently reduces variance on the validation set. On the official test set the system scores RMSE = 1.170 and Pearson = 0.812."
}Markdown (Informal)
[Token Titans at BEA 2026 Shared Task 1: Multilingual Lexical Complexity Prediction via Fine-Tuned XLM-RoBERTa with Ensemble Decoding](https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.80/) (Parashar & Mathias, BEA 2026)
ACL