@article{ataee-etal-2026-chain,
title = "Chain-of-Thought Reasoning Improves Context-Aware Translation with Large Language Models",
author = "Ataee, Shabnam and
Huart, Hugo and
Popescu-Belis, Andrei",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.298/",
pages = "3725--3741",
abstract = "This paper assesses the ability of large language models (LLMs) to translate texts that include inter-sentential dependencies. We use the English-French DiscEvalMT benchmark (Bawden et al., 2018) with pairs of sentences containing translation challenges for pronominal anaphora and lexical cohesion. We evaluate 12 LLMs from the DeepSeek-R1, GPT, Llama, Mistral and Phi families on two tasks: (1) distinguish a correct translation from a wrong but plausible one; and (2) generate a correct translation. We compare prompts that encourage chain-of-thought reasoning with those that do not. The best models take advantage of reasoning and reach about 90{\%} accuracy on the first task and COMET scores of about 92{\%} on the second task, with GPT-4, GPT-4o and Phi standing out. Moreover, we observe a ``wise get wiser'' effect: the improvements through reasoning are larger for models that already perform well without reasoning."
}Markdown (Informal)
[Chain-of-Thought Reasoning Improves Context-Aware Translation with Large Language Models](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.298/) (Ataee et al., LREC 2026)
ACL