@inproceedings{linqing-etal-2024-e3,
title = "E3: Optimizing Language Model Training for Translation via Enhancing Efficiency and Effectiveness",
author = "Linqing, Chen and
Weilei, Wang and
Dongyang, Hu",
editor = "Sun, Maosong and
Liang, Jiye and
Han, Xianpei and
Liu, Zhiyuan and
He, Yulan",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.ccl-1.79/",
pages = "1023--1034",
language = "eng",
abstract = "``In the field of Natural Language Processing (NLP), Large-scale Language Models (LLMs) havedemonstrated exceptional capabilities across a variety of tasks, including question answering,classification, and particularly, natural language understanding. The integration of neural ma-chine translation with LLMs presents significant potential, transforming the paradigms of cross-lingual communication and information exchange. This study investigates the foundational as-pects of LLMs' translation abilities and identifies effective training methodologies to equip themwith multilingual capacities. We specifically explore the optimal timing for introducing trans-lation capabilities to LLMs via supervised tasks, considering the inherent bilingual nature ofmachine translation. Key questions explored include whether it is more beneficial to integratemultiple languages during the pre-training or supervised fine-tuning (SFT) stages, how varia-tions in language ratios influence LLMs' translation abilities, and whether longer or shorter textsare more effective for training these models. This research conducts a thorough investigationby training multiple LLMs from scratch with parameter scales in the billions and enhances therobustness of our findings by upgrading the language capabilities of pre-trained open-sourcemodels with parameter scales reaching tens of billions. The aim is to provide a detailed analysisthat elucidates the complexities of augmenting machine translation capabilities within LLMs.''"
}
Markdown (Informal)
[E3: Optimizing Language Model Training for Translation via Enhancing Efficiency and Effectiveness](https://preview.aclanthology.org/fix-sig-urls/2024.ccl-1.79/) (Linqing et al., CCL 2024)
ACL