@inproceedings{jiang-liang-2026-capturing,
title = "Capturing Epistemic Uncertainty in {LLM}-Based Soft Labeling",
author = "Jiang, Yanru and
Liang, Siyu",
editor = "Mille, Simon and
Gehrmann, Sebastian and
Schmidtov{\'a}, Patr{\'i}cia and
Du{\v{s}}ek, Ond{\v{r}}ej and
Fadaee, Marzieh and
Lo, Kyle and
Santus, Enrico and
Stanovsky, Gabriel",
booktitle = "Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics ({GEM})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.21/",
pages = "177--190",
ISBN = "979-8-89176-423-1",
abstract = "In many human-annotated NLP tasks involving ambiguity or subjective judgment, annotator disagreement reflects epistemic uncertainty rather than noise. Soft labeling (SL), which represents annotations as probability distributions rather than majority-vote (MV) labels, preserves this uncertainty and can improve downstream performance. We extend this perspective to LLM-based annotation by formalizing LLM soft labeling as introducing controlled variation in model-generated annotations to approximate the latent variability underlying human annotations. We distinguish two sources of variation: model-induced (e.g., stochastic decoding and model ensembles) and human-approximated (e.g., persona prompting and human-calibrated in-context annotation). Using the Gab Hate and GoEmotions datasets, we show that SL training consistently outperforms MV training under stronger LLM-based annotation strategies. Model ensembles produce the most informative soft-label distributions, achieving the best human{--}LLM agreement and downstream classification performance. These findings suggest that scalable LLM-based annotation pipelines can model epistemic uncertainty through diverse model-level variation without explicitly simulating human attributes."
}Markdown (Informal)
[Capturing Epistemic Uncertainty in LLM-Based Soft Labeling](https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.21/) (Jiang & Liang, GEM 2026)
ACL