@inproceedings{liang-etal-2023-dynamic,
title = "Dynamic and Efficient Inference for Text Generation via {BERT} Family",
author = "Liang, Xiaobo and
Li, Juntao and
Wu, Lijun and
Cao, Ziqiang and
Zhang, Min",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.162/",
doi = "10.18653/v1/2023.acl-long.162",
pages = "2883--2897",
abstract = "Despite the excellent performance of Pre-trained Language Models on many text generation tasks, they suffer from inefficient inference on computation and memory due to their large-scale parameters and the universal autoregressive decoding paradigm. In this work, we propose a novel fine-tuning method \textbf{DEER}, which can make a single pre-trained model support \textbf{D}ynamic and \textbf{E}fficient inf\textbf{ER}ence and achieve an adaptive trade-off between model performance and latency. In particular, our critical insight is to jointly utilize the non-autoregressive (NAR) generation and dynamic parameter pruning techniques, which can flexibly control the decoding iteration steps and model sizes according to memory and latency limitations. Besides, we also explore the effectiveness of the pre-trained MLMs (i.e., the BERT family) for text generation tasks since their bidirectional attention nature is more suitable for the NAR training objective. Extensive experiments on both monolingual and multilingual pre-trained MLMs demonstrate the effectiveness of our proposed DEER method by consistently achieving (1) higher BLEU scores than the strong autoregressive Transformer model on three neural machine translation tasks with 3 $\to$ 12 times speedup, (2) competitive performance (but with much faster inference speed) compared with the BART model on four GLGE benchmark tasks. Our code will be publicly available at GitHub \url{https://github.com/dropreg/DEER}."
}
Markdown (Informal)
[Dynamic and Efficient Inference for Text Generation via BERT Family](https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.162/) (Liang et al., ACL 2023)
ACL