@inproceedings{cruz-etal-2025-improving,
title = "Improving Large Language Model Confidence Estimates using Extractive Rationales for Classification",
author = "Cruz, Jane Arleth Dela and
Hendrickx, Iris and
Larson, Martha",
editor = "Dhole, Kaustubh and
Clinciu, Miruna",
booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
month = jul,
year = "2025",
address = "Vienna, Austria and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.49/",
pages = "549--560",
ISBN = "979-8-89176-261-9",
abstract = "The adoption of large language models (LLMs) in high-stake scenarios continues to be a challenge due to lack of effective confidence calibration. Although LLMs are capable of providing convincing self-explanations and verbalizing confidence in NLP tasks, they tend to exhibit overconfidence when using generative or free-text rationales (e.g. Chain-of-Thought), where reasoning steps tend to lack verifiable grounding.In this paper, we investigate whether adding explanations in the form of extractive rationales {--}snippets of the input text that directly support the predictions, can improve the confidence calibration of LLMs in classification tasks.We examine two approaches for integrating these rationales: (1) a one-stage rationale-generation with prediction and (2) a two-stage rationale-guided confidence calibration.We evaluate these approaches on a disaster tweet classification task using four different off-the-shelf LLMs. Our results show that extracting rationales both before and after prediction can improve the confidence estimates of the LLMs. Furthermore, we find that replacing valid extractive rationales with irrelevant ones significantly lowers model confidence, highlighting the importance of rationale quality.This simple yet effective method improves LLM verbalized confidence and reduces overconfidence in possible hallucination."
}
Markdown (Informal)
[Improving Large Language Model Confidence Estimates using Extractive Rationales for Classification](https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.49/) (Cruz et al., GEM 2025)
ACL