@inproceedings{duan-zhao-2023-encoder,
title = "Encoder and Decoder, Not One Less for Pre-trained Language Model Sponsored {NMT}",
author = "Duan, Sufeng and
Zhao, Hai",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.222/",
doi = "10.18653/v1/2023.findings-acl.222",
pages = "3602--3613",
abstract = "Well pre-trained contextualized representations from pre-trained language model (PLM) have been shown helpful for enhancing various natural language processing tasks, surely including neural machine translation (NMT). However, existing methods either consider encoder-only enhancement or rely on specific multilingual PLMs, which leads to a much larger model or give up potentially helpful knowledge from target PLMs. In this paper, we propose a new monolingual PLM-sponsored NMT model to let both encoder and decoder enjoy PLM enhancement to alleviate such obvious inconvenience. Especially, incorporating a newly proposed frequency-weighted embedding transformation algorithm, PLM embeddings can be effectively exploited in terms of the representations of the NMT decoder. We evaluate our model on IWSLT14 En-De, De-En, WMT14 En-De, and En-Fr tasks, and the results show that our proposed PLM enhancement gives significant improvement and even helps achieve new state-of-the-art."
}
Markdown (Informal)
[Encoder and Decoder, Not One Less for Pre-trained Language Model Sponsored NMT](https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.222/) (Duan & Zhao, Findings 2023)
ACL