@inproceedings{parappan-henao-2025-learning,
title = "Learning Subjective Label Distributions via Sociocultural Descriptors",
author = "Parappan, Mohammed Fayiz and
Henao, Ricardo",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.1026/",
doi = "10.18653/v1/2025.emnlp-main.1026",
pages = "20333--20349",
ISBN = "979-8-89176-332-6",
abstract = "Subjectivity in NLP tasks, {\_}e.g.{\_}, toxicity classification, has emerged as a critical challenge precipitated by the increased deployment of NLP systems in content-sensitive domains. Conventional approaches aggregate annotator judgements (labels), ignoring minority perspectives, and overlooking the influence of the sociocultural context behind such annotations. We propose a framework where subjectivity in binary labels is modeled as an empirical distribution accounting for the variation in annotators through human values extracted from sociocultural descriptors using a language model. The framework also allows for downstream tasks such as population and sociocultural group-level majority label prediction. Experiments on three toxicity datasets covering human-chatbot conversations and social media posts annotated with diverse annotator pools demonstrate that our approach yields well-calibrated toxicity distribution predictions across binary toxicity labels, which are further used for majority label prediction across cultural subgroups, improving over existing methods."
}Markdown (Informal)
[Learning Subjective Label Distributions via Sociocultural Descriptors](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.1026/) (Parappan & Henao, EMNLP 2025)
ACL