@inproceedings{meng-anastasopoulos-2025-gmu,
title = "{GMU} Systems for the {IWSLT} 2025 Low-Resource Speech Translation Shared Task",
author = "Meng, Chutong and
Anastasopoulos, Antonios",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Anastasopoulos, Antonis",
booktitle = "Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.iwslt-1.29/",
pages = "289--300",
ISBN = "979-8-89176-272-5",
abstract = "This paper describes the GMU systems for the IWSLT 2025 low-resource speech translation shared task. We trained systems for all language pairs, except for Levantine Arabic. We fine-tuned SeamlessM4T-v2 for automatic speech recognition (ASR), machine translation (MT), and end-to-end speech translation (E2E ST). The ASR and MT models are also used to form cascaded ST systems. Additionally, we explored various training paradigms for E2E ST fine-tuning, including direct E2E fine-tuning, multi-task training, and parameter initialization using components from fine-tuned ASR and/or MT models. Our results show that (1) direct E2E fine-tuning yields strong results; (2) initializing with a fine-tuned ASR encoder improves ST performance on languages SeamlessM4T-v2 has not been trained on; (3) multi-task training can be slightly helpful."
}
Markdown (Informal)
[GMU Systems for the IWSLT 2025 Low-Resource Speech Translation Shared Task](https://preview.aclanthology.org/landing_page/2025.iwslt-1.29/) (Meng & Anastasopoulos, IWSLT 2025)
ACL