@inproceedings{sharma-etal-2025-faux,
title = "Faux Polyglot: A Study on Information Disparity in Multilingual Large Language Models",
author = "Sharma, Nikhil and
Murray, Kenton and
Xiao, Ziang",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.411/",
pages = "8090--8107",
ISBN = "979-8-89176-189-6",
abstract = "Although the multilingual capability of LLMs offers new opportunities to overcome the language barrier, do these capabilities translate into real-life scenarios where linguistic divide and knowledge conflicts between multilingual sources are known occurrences? In this paper, we studied LLM{'}s linguistic preference in a cross-language RAG-based information search setting. We found that LLMs displayed systemic bias towards information in the same language as the query language in both document retrieval and answer generation. Furthermore, in scenarios where no information is in the language of the query, LLMs prefer documents in high-resource languages during generation, potentially reinforcing the dominant views. Such bias exists for both factual and opinion-based queries. Our results highlight the linguistic divide within multilingual LLMs in information search systems. The seemingly beneficial multilingual capability of LLMs may backfire on information parity by reinforcing language-specific filter bubbles further marginalizing low-resource views."
}
Markdown (Informal)
[Faux Polyglot: A Study on Information Disparity in Multilingual Large Language Models](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.411/) (Sharma et al., NAACL 2025)
ACL