@inproceedings{wang-etal-2026-temperature,
title = "On Temperature-Constrained Non-Deterministic Machine Translation: Potential and Evaluation",
author = "Wang, Weichuan and
Liu, Mingyang and
Ma, Chen and
Song, Linqi",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.379/",
pages = "7677--7701",
ISBN = "979-8-89176-395-1",
abstract = "In recent years, the non-deterministic properties of language models have garnered considerable attention and have shown a significant influence on real-world applications. However, such properties remain under-explored in machine translation (MT), a complex, non-deterministic NLP task. In this study, we systematically evaluate modern MT systems and identify temperature-constrained **N**on-**D**eterministic **MT** (**ND-MT**) as a distinct phenomenon. Additionally, we demonstrate that ND-MT exhibits significant potential in addressing the multimodality issue that has long challenged MT research and provides higher-quality candidates than **D**eterministic MT (D-MT) under temperature constraints. However, ND-MT introduces new challenges in evaluating system performance. Specifically, the evaluation framework designed for D-MT fails to yield consistent evaluation results when applied to ND-MT. We further investigate this emerging challenge by evaluating state-of-the-art ND-MT systems using both lexical-based and semantic-based metrics at varying sampling sizes. The results reveal a Buckets Effect across these systems: the ranking of ND-MT systems is dominated by the worst-quality candidate translation, as shown by automatic evaluation metrics. To mitigate this issue, we propose ExpectoSample, a strategy that first identifies reliable metrics and then enables robust ND-MT system selection for real-world."
}Markdown (Informal)
[On Temperature-Constrained Non-Deterministic Machine Translation: Potential and Evaluation](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.379/) (Wang et al., Findings 2026)
ACL