@inproceedings{akter-etal-2023-evaluation,
title = "On Evaluation of {B}angla Word Analogies",
author = "Akter, Mousumi and
Sarkar, Souvika and
Karmaker Santu, Shubhra Kanti",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.811/",
doi = "10.18653/v1/2023.emnlp-main.811",
pages = "13121--13127",
abstract = "This paper presents a benchmark dataset of Bangla word analogies for evaluating the quality of existing Bangla word embeddings. Despite being the 7th largest spoken language in the world, Bangla is still a low-resource language and popular NLP models often struggle to perform well on Bangla data sets. Therefore, developing a robust evaluation set is crucial for benchmarking and guiding future research on improving Bangla word embeddings, which is currently missing. To address this issue, we introduce a new evaluation set of 16,678 unique word analogies in Bangla as well as a translated and curated version of the original Mikolov dataset (10,594 samples) in Bangla. Our experiments with different state-of-the-art embedding models reveal that current Bangla word embeddings struggle to achieve high accuracy on both data sets, demonstrating a significant gap in multilingual NLP research."
}
Markdown (Informal)
[On Evaluation of Bangla Word Analogies](https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.811/) (Akter et al., EMNLP 2023)
ACL
- Mousumi Akter, Souvika Sarkar, and Shubhra Kanti Karmaker Santu. 2023. On Evaluation of Bangla Word Analogies. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 13121–13127, Singapore. Association for Computational Linguistics.