@inproceedings{nair-etal-2026-lost,
title = "Lost in Translation? How Language Shapes Responsibility Attribution in Large Language Models",
author = "Nair, Pavithra P M and
Gressel, Gilad and
Achuthan, Krishnashree",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cao, Yong and
Zhou, Li and
Ma, BOlei and
Adebara, Ife",
booktitle = "Proceedings of the 4th Workshop on Cross-Cultural Considerations in {NLP} ({C}3{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.c3nlp-1.11/",
pages = "137--149",
ISBN = "979-8-89176-420-0",
abstract = "Large language models (LLMs) are increasingly deployed in multilingual settings, yet little is known about whether their moral and social judgments remain consistent across languages. In particular, when faced with moral and social dilemmas, LLMs must often implicitly or explicitly assign responsibility {---} to an individual, to broader social forces, or across multiple parties {---} a process known as responsibility attribution. This study investigates whether responsibility attributions vary across languages, whether any observed variation persists across thematic domains, and whether the degree of variation differs across LLMs. We evaluate three models (GPT-5.2, Gemini-2.5-Pro, and LLaMA-3.3-70B) across 12 scenarios spanning six thematic domains (marriage, career, authority, gender, elder care, and family). Each model was prompted to attribute responsibility for each scenario by selecting from four options: the primary individual, a secondary interpersonal actor, a broader societal factor, or distributed responsibility shared across multiple parties. Results reveal a significant overall association between language and responsibility attribution (Cram{\'e}r{'}s $V = 0.24$) that persists within every thematic domain ($V = 0.26${--}0.53). The magnitude of cross-language variation is strongly model-dependent: GPT-5.2 and Gemini-2.5-Pro show modest shifts ($V \approx 0.19$), while LLaMA-3.3-70B exhibits substantially stronger divergence ($V = 0.52$). These findings suggest that normative consistency across languages cannot be assumed and should be treated as a distinct dimension of model evaluation."
}Markdown (Informal)
[Lost in Translation? How Language Shapes Responsibility Attribution in Large Language Models](https://preview.aclanthology.org/ingest-acl-workshops/2026.c3nlp-1.11/) (Nair et al., C3NLP 2026)
ACL