@inproceedings{shani-basirat-2025-language,
title = "Language Dominance in Multilingual Large Language Models",
author = "Shani, Nadav and
Basirat, Ali",
editor = "Belinkov, Yonatan and
Mueller, Aaron and
Kim, Najoung and
Mohebbi, Hosein and
Chen, Hanjie and
Arad, Dana and
Sarti, Gabriele",
booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.blackboxnlp-1.7/",
pages = "137--148",
ISBN = "979-8-89176-346-3",
abstract = "This paper investigates the language dominance hypothesis in multilingual large language models (LLMs), which posits that cross-lingual understanding is facilitated by an implicit translation into a dominant language seen more frequently during pretraining. We propose a novel approach to quantify how languages influence one another in a language model. By analyzing the hidden states across intermediate layers of language models, we model interactions between language-specific embedding spaces using Gaussian Mixture Models. Our results reveal only weak signs of language dominance in middle layers, affecting only a fraction of tokens. Our findings suggest that multilingual processing in LLMs is better explained by language-specific and shared representational spaces rather than internal translation into a single dominant language."
}Markdown (Informal)
[Language Dominance in Multilingual Large Language Models](https://preview.aclanthology.org/ingest-emnlp/2025.blackboxnlp-1.7/) (Shani & Basirat, BlackboxNLP 2025)
ACL