@inproceedings{liang-etal-2022-janus,
title = "{JANUS}: Joint Autoregressive and Non-autoregressive Training with Auxiliary Loss for Sequence Generation",
author = "Liang, Xiaobo and
Wu, Lijun and
Li, Juntao and
Zhang, Min",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.emnlp-main.550/",
doi = "10.18653/v1/2022.emnlp-main.550",
pages = "8050--8060",
abstract = "Transformer-based autoregressive and non-autoregressive models have played an essential role in sequence generation tasks. The autoregressive model can obtain excellent performance, while the non-autoregressive model brings fast decoding speed for inference. In this paper, we propose \textbf{JANUS}, a \textbf{J}oint \textbf{A}utoregressive and \textbf{N}on-autoregressive training method using a\textbf{U}xiliary los\textbf{S} to enhance the model performance in both AR and NAR manner simultaneously and effectively alleviate the problem of distribution discrepancy.Further, we pre-train BART with JANUS on a large corpus with minimal cost (16 GPU days) and make the BART-JANUS capable of non-autoregressive generation, demonstrating that our approach can transfer the AR knowledge to NAR. Empirically, we show our approach and BART-JANUS can achieve significant improvement on multiple generation tasks, including machine translation and GLGE benchmarks. Our code is available at Github."
}
Markdown (Informal)
[JANUS: Joint Autoregressive and Non-autoregressive Training with Auxiliary Loss for Sequence Generation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.emnlp-main.550/) (Liang et al., EMNLP 2022)
ACL