@inproceedings{de-vries-etal-2023-dumb,
title = "{DUMB}: A Benchmark for Smart Evaluation of {D}utch Models",
author = "de Vries, Wietse and
Wieling, Martijn and
Nissim, Malvina",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.447/",
doi = "10.18653/v1/2023.emnlp-main.447",
pages = "7221--7241",
abstract = "We introduce the Dutch Model Benchmark: DUMB. The benchmark includes a diverse set of datasets for low-, medium- and high-resource tasks. The total set of nine tasks includes four tasks that were previously not available in Dutch. Instead of relying on a mean score across tasks, we propose Relative Error Reduction (RER), which compares the DUMB performance of language models to a strong baseline which can be referred to in the future even when assessing different sets of language models. Through a comparison of 14 pre-trained language models (mono- and multi-lingual, of varying sizes), we assess the internal consistency of the benchmark tasks, as well as the factors that likely enable high performance. Our results indicate that current Dutch monolingual models under-perform and suggest training larger Dutch models with other architectures and pre-training objectives. At present, the highest performance is achieved by DeBERTaV3 (large), XLM-R (large) and mDeBERTaV3 (base). In addition to highlighting best strategies for training larger Dutch models, DUMB will foster further research on Dutch. A public leaderboard is available at https://dumbench.nl."
}
Markdown (Informal)
[DUMB: A Benchmark for Smart Evaluation of Dutch Models](https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.447/) (de Vries et al., EMNLP 2023)
ACL
- Wietse de Vries, Martijn Wieling, and Malvina Nissim. 2023. DUMB: A Benchmark for Smart Evaluation of Dutch Models. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 7221–7241, Singapore. Association for Computational Linguistics.