@inproceedings{taffa-etal-2024-low,
title = "Low Resource Question Answering: An {A}mharic Benchmarking Dataset",
author = "Taffa, Tilahun Abedissa and
Usbeck, Ricardo and
Assabie, Yaregal",
editor = "Mabuya, Rooweither and
Matfunjwa, Muzi and
Setaka, Mmasibidi and
van Zaanen, Menno",
booktitle = "Proceedings of the Fifth Workshop on Resources for African Indigenous Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.rail-1.14/",
pages = "124--132",
abstract = "Question Answering (QA) systems return concise answers or answer lists based on natural language text, which uses a given context document. Many resources go into curating QA datasets to advance the development of robust QA models. There is a surge in QA datasets for languages such as English; this is different for low-resource languages like Amharic. Indeed, there is no published or publicly available Amharic QA dataset. Hence, to foster further research in low-resource QA, we present the first publicly available benchmarking Amharic Question Answering Dataset (Amh-QuAD). We crowdsource 2,628 question-answer pairs from over 378 Amharic Wikipedia articles. Using the training set, we fine-tune an XLM-R-based language model and introduce a new reader model. Leveraging our newly fine-tuned reader run a baseline model to spark open-domain Amharic QA research interest. The best- performing baseline QA achieves an F-score of 80.3 and 81.34 in retriever-reader and reading comprehension settings."
}
Markdown (Informal)
[Low Resource Question Answering: An Amharic Benchmarking Dataset](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.rail-1.14/) (Taffa et al., RAIL 2024)
ACL