@inproceedings{bentes-safka-2026-pinch,
title = "Pinch-{AST}: Robust Cascaded Speech Translation System for the {IWSLT} 2026 Simultaneous Speech Translation Task",
author = "Bentes, Carlos and
Safka, Christian",
editor = "Salesky, Elizabeth and
Anastasopoulos, Antonios and
Negri, Matteo and
Federico, Marcello",
booktitle = "Proceedings of the 23rd International Conference on Spoken Language Translation ({IWSLT} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.iwslt-1.30/",
doi = "10.18653/v1/2026.iwslt-1.30",
pages = "268--271",
ISBN = "979-8-89176-411-8",
abstract = "We describe Pinch-AST, our submission to the IWSLT 2026 Simultaneous Speech-to-Text Translation shared task, covering all four official directions (En {\textrightarrow} De, En {\textrightarrow} It, En {\textrightarrow} Zh, Cs {\textrightarrow} En) under both low- and high- latency regimes. Pinch-AST is a cascaded system pairing off-the-shelf speech models with a translation backbone adapted per language pair via LoRA on ASR-noise-augmented parallel data. The streaming policy is a character-level longest-common-prefix re-translation strategy, and the full pipeline runs on a single H100 80 GB GPU within the real-time budget. Evaluated on the IWSLT 2026 development set, Pinch-AST achieves competitive quality{--}latency trade-offs across all four language pairs in both latency regimes."
}Markdown (Informal)
[Pinch-AST: Robust Cascaded Speech Translation System for the IWSLT 2026 Simultaneous Speech Translation Task](https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.iwslt-1.30/) (Bentes & Safka, IWSLT 2026)
ACL