@inproceedings{merad-etal-2025-language,
title = "Language ver{Y} Rare for All",
author = "Merad, Ibrahim and
Wolf, Amos and
Mazzawi, Ziad and
L{\'e}o, Yannick",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2025.loreslm-1.12/",
pages = "166--174",
abstract = "In the quest to overcome language barriers, encoder-decoder models like NLLB have expanded machine translation to rare languages, with some models (e.g., NLLB 1.3B) even trainable on a single GPU. While general-purpose LLMs perform well in translation, open LLMs prove highly competitive when fine-tuned for specific tasks involving unknown corpora. We introduce LYRA (Language verY Rare for All), a novel approach that combines open LLM fine-tuning, retrieval-augmented generation (RAG), and transfer learning from related high-resource languages. This study is exclusively focused on single-GPU training to facilitate ease of adoption. Our study focuses on two-way translation between French and Mon{\'e}gasque {---} a rare language unsupported by existing translation tools due to limited corpus availability. Our results demonstrate LYRA`s effectiveness, frequently surpassing and consistently matching state-of-the-art encoder-decoder models in rare language translation."
}
Markdown (Informal)
[Language verY Rare for All](https://preview.aclanthology.org/add-emnlp-2024-awards/2025.loreslm-1.12/) (Merad et al., LoResLM 2025)
ACL
- Ibrahim Merad, Amos Wolf, Ziad Mazzawi, and Yannick Léo. 2025. Language verY Rare for All. In Proceedings of the First Workshop on Language Models for Low-Resource Languages, pages 166–174, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.