@inproceedings{shah-etal-2026-tie,
title = "Tie-Calibrated {COMETK}iwi for Speech Translation Quality Estimation: {IWSLT}2026 Metrics Track",
author = "Shah, Mubashir Hussain and
Fatima, Aymen and
Choi, Kiho and
Jang, Daehee",
editor = "Salesky, Elizabeth and
Anastasopoulos, Antonios and
Negri, Matteo and
Federico, Marcello",
booktitle = "Proceedings of the 23rd International Conference on Spoken Language Translation ({IWSLT} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.iwslt-1.36/",
doi = "10.18653/v1/2026.iwslt-1.36",
pages = "318--322",
ISBN = "979-8-89176-411-8",
abstract = "We describe our submission to the IWSLT 2026 Speech Translation Metrics shared task, which targets reference-free quality estimation for English-to-German and English-to-Chinese speech translation. Our primary system combines COMETKiwi-22, applied to ASR transcripts, with a lightweight post-processing step called tie calibration: a learned score-bucketing that collapses near-identical scores into exact ties, reducing noisy within-document pairwise ranking errors. On the official development set the method achieves a segment-level Kendall tau-b of 39.4{\%} on average, compared to 34.6{\%} for plain COMETKiwi, 29.2{\%} for SpeechQE, and 24.4{\%} for BLASER 2.0 QE. System-level Soft Pairwise Accuracy is 88.0{\%}, comparable to COMETKiwi (89.4{\%}) and above SpeechQE (86.0{\%}). The method requires no audio, no retraining, and one hyperparameter per target language tuned entirely on the training split."
}Markdown (Informal)
[Tie-Calibrated COMETKiwi for Speech Translation Quality Estimation: IWSLT2026 Metrics Track](https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.iwslt-1.36/) (Shah et al., IWSLT 2026)
ACL