@inproceedings{hong-etal-2025-migrate,
title = "{MIGRATE}: Cross-Lingual Adaptation of Domain-Specific {LLM}s through Code-Switching and Embedding Transfer",
author = "Hong, Seongtae and
Lee, Seungyoon and
Moon, Hyeonseok and
Lim, Heuiseok",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.617/",
pages = "9184--9193",
abstract = "Large Language Models (LLMs) have rapidly advanced, with domain-specific expert models emerging to handle specialized tasks across various fields. However, the predominant focus on English-centric models demands extensive data, making it challenging to develop comparable models for middle and low-resource languages. To address this limitation, we introduce Migrate, a novel method that leverages open-source static embedding models and up to 3 million tokens of code-switching data to facilitate the seamless transfer of embeddings to target languages. Migrate enables effective cross-lingual adaptation without requiring large-scale domain-specific corpora in the target language, promoting the accessibility of expert LLMs to a diverse range of linguistic communities. Our experimental results demonstrate that Migrate significantly enhances model performance in target languages, outperforming baseline and existing cross-lingual transfer methods. This approach provides a practical and efficient solution for extending the capabilities of domain-specific expert models."
}
Markdown (Informal)
[MIGRATE: Cross-Lingual Adaptation of Domain-Specific LLMs through Code-Switching and Embedding Transfer](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.617/) (Hong et al., COLING 2025)
ACL