@inproceedings{yamada-ri-2024-leia,
title = "{LEIA}: Facilitating Cross-lingual Knowledge Transfer in Language Models with Entity-based Data Augmentation",
author = "Yamada, Ikuya and
Ri, Ryokan",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.419/",
doi = "10.18653/v1/2024.findings-acl.419",
pages = "7029--7039",
abstract = "Adapting English-based large language models (LLMs) to other languages has become increasingly popular due to the efficiency and potential of cross-lingual transfer. However, existing language adaptation methods often overlook the benefits of cross-lingual supervision. In this study, we introduce LEIA, a language adaptation tuning method that utilizes Wikipedia entity names aligned across languages. This method involves augmenting the target language corpus with English entity names and training the model using left-to-right language modeling. We assess LEIA on diverse question answering datasets using 7B-parameter LLMs, demonstrating significant performance gains across various non-English languages."
}
Markdown (Informal)
[LEIA: Facilitating Cross-lingual Knowledge Transfer in Language Models with Entity-based Data Augmentation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.419/) (Yamada & Ri, Findings 2024)
ACL