@inproceedings{zhuocheng-etal-2023-scaling,
title = "Scaling Law for Document Neural Machine Translation",
author = "Zhuocheng, Zhang and
Gu, Shuhao and
Zhang, Min and
Feng, Yang",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2023.findings-emnlp.556/",
doi = "10.18653/v1/2023.findings-emnlp.556",
pages = "8290--8303",
abstract = "The scaling laws of language models have played a significant role in advancing large language models. In order to promote the development of document translation, we systematically examine the scaling laws in this field. In this paper, we carry out an in-depth analysis of the influence of three factors on translation quality: model scale, data scale, and sequence length. Our findings reveal that increasing sequence length effectively enhances model performance when model size is limited. However, sequence length cannot be infinitely extended; it must be suitably aligned with the model scale and corpus volume. Further research shows that providing adequate context can effectively enhance the translation quality of a document`s initial portion. Nonetheless, exposure bias remains the primary factor hindering further improvement in translation quality for the latter half of the document."
}
Markdown (Informal)
[Scaling Law for Document Neural Machine Translation](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2023.findings-emnlp.556/) (Zhuocheng et al., Findings 2023)
ACL
- Zhang Zhuocheng, Shuhao Gu, Min Zhang, and Yang Feng. 2023. Scaling Law for Document Neural Machine Translation. In Findings of the Association for Computational Linguistics: EMNLP 2023, pages 8290–8303, Singapore. Association for Computational Linguistics.