@inproceedings{kim-etal-2026-query,
title = "Query-Following vs Context-Anchoring: How {LLM}s Handle Cross-Turn Language Switching",
author = "Kim, Kyuhee and
Chen, Chengheng Li and
Sotnikova, Anna",
editor = "Chen, Pinzhen and
Zouhar, Vil{\'e}m and
Hu, Hanxu and
Khanuja, Simran and
Zhu, Wenhao and
Haddow, Barry and
Birch, Alexandra and
Aji, Alham Fikri and
Sennrich, Rico and
Hooker, Sara",
booktitle = "Proceedings of the First Workshop on Multilingual Multicultural Evaluation",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.mme-main.13/",
pages = "196--203",
ISBN = "979-8-89176-368-5",
abstract = "When multilingual users switch languages mid-conversation, how should LLMs respond? We extend MultiChallenge to evaluate cross-turn language switching, translating 182 multi-turn conversations into German, Chinese, Spanish, and Arabic. Across five frontier models, we observe asymmetric behavior: switching into a foreign language (EN{\textrightarrow}X) yields high query-language fidelity (89{--}99{\%}), but switching back to English (X{\textrightarrow}EN) reveals divergent policies. GPT-5 follows the query language ($>$95{\%}), while Claude Opus 4.5 and Command R+ maintain the established conversation language ($<$8{\%}). Task accuracy remains stable across conditions regardless of language selection differences. A simple explicit system prompt shows limited effectiveness in modifying these defaults."
}Markdown (Informal)
[Query-Following vs Context-Anchoring: How LLMs Handle Cross-Turn Language Switching](https://preview.aclanthology.org/manual-author-scripts/2026.mme-main.13/) (Kim et al., MME 2026)
ACL