@inproceedings{brans-bloem-2024-simlex,
title = "{S}im{L}ex-999 for {D}utch",
author = "Brans, Lizzy and
Bloem, Jelke",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.1292/",
pages = "14832--14845",
abstract = "Word embeddings revolutionised natural language processing by effectively representing words as dense vectors. Although many datasets exist to evaluate English embeddings, few cater to Dutch. We developed a Dutch variant of the SimLex-999 word similarity dataset by gathering similarity judgements from 235 native Dutch speakers. Subsequently, we evaluated two popular Dutch language models, Bertje and RobBERT, finding that Bertje showed superior alignment with human semantic similarity judgments compared to RobBERT. This study provides the first intrinsic Dutch word embedding evaluation dataset, which enables accurate assessment of these embeddings and fosters the development of effective Dutch language models."
}
Markdown (Informal)
[SimLex-999 for Dutch](https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.1292/) (Brans & Bloem, LREC-COLING 2024)
ACL
- Lizzy Brans and Jelke Bloem. 2024. SimLex-999 for Dutch. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 14832–14845, Torino, Italia. ELRA and ICCL.