@inproceedings{liu-etal-2021-cross,
title = "Cross Attention Augmented Transducer Networks for Simultaneous Translation",
author = "Liu, Dan and
Du, Mengge and
Li, Xiaoxi and
Li, Ya and
Chen, Enhong",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/moar-dois/2021.emnlp-main.4/",
doi = "10.18653/v1/2021.emnlp-main.4",
pages = "39--55",
abstract = "This paper proposes a novel architecture, Cross Attention Augmented Transducer (CAAT), for simultaneous translation. The framework aims to jointly optimize the policy and translation models. To effectively consider all possible READ-WRITE simultaneous translation action paths, we adapt the online automatic speech recognition (ASR) model, RNN-T, but remove the strong monotonic constraint, which is critical for the translation task to consider reordering. To make CAAT work, we introduce a novel latency loss whose expectation can be optimized by a forward-backward algorithm. We implement CAAT with Transformer while the general CAAT architecture can also be implemented with other attention-based encoder-decoder frameworks. Experiments on both speech-to-text (S2T) and text-to-text (T2T) simultaneous translation tasks show that CAAT achieves significantly better latency-quality trade-offs compared to the state-of-the-art simultaneous translation approaches."
}
Markdown (Informal)
[Cross Attention Augmented Transducer Networks for Simultaneous Translation](https://preview.aclanthology.org/moar-dois/2021.emnlp-main.4/) (Liu et al., EMNLP 2021)
ACL