@inproceedings{papi-etal-2024-simulseamless,
title = "{S}imul{S}eamless: {FBK} at {IWSLT} 2024 Simultaneous Speech Translation",
author = "Papi, Sara and
Gaido, Marco and
Negri, Matteo and
Bentivogli, Luisa",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.iwslt-1.11/",
doi = "10.18653/v1/2024.iwslt-1.11",
pages = "72--79",
abstract = "This paper describes the FBK{'}s participation in the Simultaneous Translation Evaluation Campaign at IWSLT 2024. For this year{'}s submission in the speech-to-text translation (ST) sub-track, we propose SimulSeamless, which is realized by combining AlignAtt and SeamlessM4T in its medium configuration. The SeamlessM4T model is used `off-the-shelf' and its simultaneous inference is enabled through the adoption of AlignAtt, a SimulST policy based on cross-attention that can be applied without any retraining or adaptation of the underlying model for the simultaneous task. We participated in all the Shared Task languages (English-{\ensuremath{>}}German, Japanese, Chinese, and Czech-{\ensuremath{>}}English), achieving acceptable or even better results compared to last year{'}s submissions. SimulSeamless, covering more than 143 source languages and 200 target languages, is released at: https://github.com/hlt-mt/FBK-fairseq/."
}
Markdown (Informal)
[SimulSeamless: FBK at IWSLT 2024 Simultaneous Speech Translation](https://preview.aclanthology.org/fix-sig-urls/2024.iwslt-1.11/) (Papi et al., IWSLT 2024)
ACL
- Sara Papi, Marco Gaido, Matteo Negri, and Luisa Bentivogli. 2024. SimulSeamless: FBK at IWSLT 2024 Simultaneous Speech Translation. In Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024), pages 72–79, Bangkok, Thailand (in-person and online). Association for Computational Linguistics.