@inproceedings{nie-etal-2025-mechanistic,
title = "Mechanistic Understanding and Mitigation of Language Confusion in {E}nglish-Centric Large Language Models",
author = "Nie, Ercong and
Schmid, Helmut and
Schuetze, Hinrich",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.37/",
doi = "10.18653/v1/2025.findings-emnlp.37",
pages = "690--706",
ISBN = "979-8-89176-335-7",
abstract = "Language confusion{---}where large language models (LLMs) generate unintended languages against the user{'}s need{---}remains a critical challenge, especially for English-centric models. We present the first mechanistic interpretability (MI) study of language confusion, combining behavioral benchmarking with neuron-level analysis. Using the Language Confusion Benchmark (LCB), we show that confusion points (CPs){---}specific positions where language switches occur{---}are central to this phenomenon. Through layer-wise analysis with TunedLens and targeted neuron attribution, we reveal that transition failures in the final layers drive confusion. We further demonstrate that editing a small set of critical neurons, identified via comparative analysis with a multilingual-tuned counterpart, substantially mitigates confusion while largely preserving general competence and fluency. Our approach matches multilingual alignment in confusion reduction for many languages and yields cleaner, higher-quality outputs. These findings provide new insights into the internal dynamics of LLMs and highlight neuron-level interventions as a promising direction for robust, interpretable multilingual language modeling."
}Markdown (Informal)
[Mechanistic Understanding and Mitigation of Language Confusion in English-Centric Large Language Models](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.37/) (Nie et al., Findings 2025)
ACL