@inproceedings{lee-etal-2025-multilingual,
title = "Multilingual, Not Multicultural: Uncovering the Cultural Empathy Gap in {LLM}s through a Comparative Empathetic Dialogue Benchmark",
author = "Lee, Woojin and
Sim, Yujin and
Kim, Hongjin and
Kim, Harksoo",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.44/",
pages = "791--809",
ISBN = "979-8-89176-298-5",
abstract = "Large Language Models (LLMs) demonstrate remarkable multilingual capabilities, yet it remains unclear whether they are truly multicultural. Do they merely process different languages, or can they genuinely comprehend the unique cultural contexts embedded within them? This study investigates this critical question by examining whether LLM{'}s perception of emotion and empathy differs across linguistic and cultural boundaries. To facilitate this, we introduce the Korean Empathetic Dialogues (KoED), a benchmark extending the English-based EmpatheticDialogues (ED) dataset. Moving beyond direct translation, we meticulously reconstructed dialogues specifically selected for their potential for cultural adaptation, aligning them with Korean emotional nuances and incorporating key cultural concepts like `jeong' and `han' that lack direct English equivalents. Our cross-cultural evaluation of leading multilingual LLMs reveals a significant ``cultural empathy gap'': models consistently underperform on KoED compared to ED, struggling especially with uniquely Korean emotional expressions. Notably, the Korean-centric model, EXAONE, exhibits significantly higher cultural appropriateness. This result provides compelling evidence that aligns with the ``data provenance effect'', suggesting that the cultural alignment of pre-training data is a critical factor for genuine empathetic communication. These findings demonstrate that current LLMs have cultural blind spots and underscore the necessity of benchmarks like KoED to move beyond simple linguistic fluency towards truly culturally adaptive AI systems."
}Markdown (Informal)
[Multilingual, Not Multicultural: Uncovering the Cultural Empathy Gap in LLMs through a Comparative Empathetic Dialogue Benchmark](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.44/) (Lee et al., IJCNLP-AACL 2025)
ACL