@inproceedings{sainath-2026-convergent,
title = "Convergent Demographic Utility Hierarchies: Geometry of Intersectional Values in {LLM}s",
author = "Sainath, Pravish",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-srw.122/",
pages = "1376--1390",
ISBN = "979-8-89176-393-7",
abstract = "Recent work has shown that LLMs develop internally coherent utility functions that emerge with scale, yet whether these value systemsencode systematic demographic hierarchies remains unexplored. We elicit pairwise preferences across 15 intersectional demographic groups (defined by race, gender, and their combinations) and 8 policy domains on three 7{--}8B instruction-tuned LLMs, fitting Thurstonian utility models to the resulting preference matrices. All three models converge on a compensatory hierarchy that invertsreal-world structural advantage, consistently ranking marginalized groups, the highest and dominant groups are lowest. Intersectional utilities do not combine additively: single-axis audits that measure gender and race gaps independently overestimate the most extreme intersectional gap by 26- 40{\%} in our experiments. Geometrically, we identify a linear direction in the representation space that predicts the full utility hierarchy from neutral sentences alone, and show that this direction is substantially aligned with gender encoding but not with race encoding. Orthogonalization reveals that gender separation in representations is not fully explained by utility encoding. The hierarchy is already present in base (pre-alignment) models and is amplified several-fold by instruction tuning, suggesting it originates in pre-training data rather than alignment procedures."
}Markdown (Informal)
[Convergent Demographic Utility Hierarchies: Geometry of Intersectional Values in LLMs](https://preview.aclanthology.org/ingest-acl/2026.acl-srw.122/) (Sainath, ACL 2026)
ACL