@inproceedings{yang-etal-2024-multiple,
title = "Multiple Sources are Better Than One: Incorporating External Knowledge in Low-Resource Glossing",
author = "Yang, Changbing and
Nicolai, Garrett and
Silfverberg, Miikka",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.261/",
doi = "10.18653/v1/2024.emnlp-main.261",
pages = "4537--4552",
abstract = "In this paper, we address the data scarcity problem in automatic data-driven glossing for low-resource languages by coordinating multiple sources of linguistic expertise. We enhance models by incorporating both token-level and sentence-level translations, utilizing the extensive linguistic capabilities of modern LLMs, and incorporating available dictionary resources. Our enhancements lead to an average absolute improvement of 5{\%}-points in word-level accuracy over the previous state of the art on a typologically diverse dataset spanning six low-resource languages. The improvements are particularly noticeable for the lowest-resourced language Gitksan, where we achieve a 10{\%}-point improvement. Furthermore, in a simulated ultra-low resource setting for the same six languages, training on fewer than 100 glossed sentences, we establish an average 10{\%}-point improvement in word-level accuracy over the previous state-of-the-art system."
}
Markdown (Informal)
[Multiple Sources are Better Than One: Incorporating External Knowledge in Low-Resource Glossing](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.261/) (Yang et al., EMNLP 2024)
ACL