@article{khallaf-sharoff-2026-much,
title = "How Much Noise Can {BERT} Handle? Insights from Multilingual Sentence Difficulty Detection",
author = "Khallaf, Nouran and
Sharoff, Serge",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.485/",
pages = "6132--6143",
abstract = "Noisy training data can significantly degrade the performance of language-model-based classifiers, particularly in non-topical classification tasks. This study explores a range of denoising strategies for sentence-level difficulty detection, using training data derived from document-level difficulty annotations obtained through noisy crowdsourcing. Beyond monolingual settings, we also address cross-lingual transfer, where a multilingual language model is trained in one language and tested in another. We evaluate several noise reduction techniques, including Gaussian Mixture Models (GMM), Co-Teaching, Noise Transition Matrices, and Label Smoothing. Our results indicate that while BERT-based models exhibit inherent robustness to noise, incorporating explicit noise detection can further enhance performance. For our smaller dataset, GMM-based noise filtering proves particularly effective in improving prediction quality by raising the AUC score from 0.52 to 0.86, or to 0.92 when two de-noising methods are combined (GMM and Co-Teaching). However, for our larger dataset, the intrinsic regularisation of pre-trained language models provides a strong baseline, with denoising methods yielding only marginal gains (from 0.8948 to 0.8984, or to 0.9061 when two denoising methods are combined). Nonetheless, removing noisy sentences (about 20{\%} of the dataset) helps in producing a cleaner corpus with fewer infelicities. As a result we have released the largest available multilingual corpus for sentence difficulty prediction."
}Markdown (Informal)
[How Much Noise Can BERT Handle? Insights from Multilingual Sentence Difficulty Detection](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.485/) (Khallaf & Sharoff, LREC 2026)
ACL