@inproceedings{liu-etal-2021-ustc,
title = "The {USTC}-{NELSLIP} Systems for Simultaneous Speech Translation Task at {IWSLT} 2021",
author = "Liu, Dan and
Du, Mengge and
Li, Xiaoxi and
Hu, Yuchen and
Dai, Lirong",
editor = "Federico, Marcello and
Waibel, Alex and
Costa-juss{\`a}, Marta R. and
Niehues, Jan and
Stuker, Sebastian and
Salesky, Elizabeth",
booktitle = "Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021)",
month = aug,
year = "2021",
address = "Bangkok, Thailand (online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.iwslt-1.2",
doi = "10.18653/v1/2021.iwslt-1.2",
pages = "30--38",
abstract = "This paper describes USTC-NELSLIP{'}s submissions to the IWSLT2021 Simultaneous Speech Translation task. We proposed a novel simultaneous translation model, Cross-Attention Augmented Transducer (CAAT), which extends conventional RNN-T to sequence-to-sequence tasks without monotonic constraints, e.g., simultaneous translation. Experiments on speech-to-text (S2T) and text-to-text (T2T) simultaneous translation tasks shows CAAT achieves better quality-latency trade-offs compared to \textit{wait-k}, one of the previous state-of-the-art approaches. Based on CAAT architecture and data augmentation, we build S2T and T2T simultaneous translation systems in this evaluation campaign. Compared to last year{'}s optimal systems, our S2T simultaneous translation system improves by an average of 11.3 BLEU for all latency regimes, and our T2T simultaneous translation system improves by an average of 4.6 BLEU.",
}
Markdown (Informal)
[The USTC-NELSLIP Systems for Simultaneous Speech Translation Task at IWSLT 2021](https://aclanthology.org/2021.iwslt-1.2) (Liu et al., IWSLT 2021)
ACL