@inproceedings{el-mekki-abdul-mageed-2025-effective,
title = "Effective Self-Mining of In-Context Examples for Unsupervised Machine Translation with {LLM}s",
author = "El Mekki, Abdellah and
Abdul-Mageed, Muhammad",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.238/",
pages = "4229--4256",
ISBN = "979-8-89176-195-7",
abstract = "Large Language Models (LLMs) have demonstrated impressive performance on a wide range of natural language processing (NLP) tasks, primarily through in-context learning (ICL). In ICL, the LLM is provided with examples that represent a given task such that it learns to generate answers for test inputs. However, access to these in-context examples is not guaranteed especially for low-resource or massively multilingual tasks. In this work, we propose an unsupervised approach to mine in-context examples for machine translation (MT), enabling unsupervised MT (UMT) across different languages. Our approach begins with word-level mining to acquire word translations that are then used to perform sentence-level mining. As the quality of mined parallel pairs may not be optimal due to noise or mistakes, we introduce a filtering criterion to select the optimal in-context examples from a pool of unsupervised parallel sentences. We evaluate our approach using two multilingual LLMs on 288 directions from the FLORES-200 dataset (CITATION) and analyze the impact of various linguistic features on performance. Our findings demonstrate the effectiveness of our unsupervised approach in mining in-context examples for MT, leading to better or comparable translation performance as translation with regular in-context samples (extracted from human-annotated data), while also outperforming the other state-of-the-art UMT methods by an average of 7 BLEU points."
}
Markdown (Informal)
[Effective Self-Mining of In-Context Examples for Unsupervised Machine Translation with LLMs](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.238/) (El Mekki & Abdul-Mageed, Findings 2025)
ACL