@inproceedings{podolak-verma-2025-read,
title = "Read Your Own Mind: Reasoning Helps Surface Self-Confidence Signals in {LLM}s",
author = "Podolak, Jakub and
Verma, Rajeev",
editor = "Noidea, Noidea",
booktitle = "Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.uncertainlp-main.21/",
doi = "10.18653/v1/2025.uncertainlp-main.21",
pages = "247--258",
ISBN = "979-8-89176-349-4",
abstract = "We study the source of uncertainty in DeepSeek R1-32B by analyzing its self-reported verbal confidence on question answering (QA) tasks. In the default answer-then-confidence setting, the model is regularly over-confident, whereas semantic entropy - obtained by sampling many responses - remains reliable. We hypothesize that this is because of semantic entropy{'}s larger test-time compute, which lets us explore the model{'}s predictive distribution. We show that granting DeepSeek the budget to explore its distribution by forcing a long chain-of-thought before the final answer greatly improves its verbal score effectiveness, even on simple fact-retrieval questions that normally require no reasoning. Our analysis concludes that reliable uncertainty estimation requires explicit exploration of the generative space, and self-reported confidence is trustworthy only after such exploration."
}Markdown (Informal)
[Read Your Own Mind: Reasoning Helps Surface Self-Confidence Signals in LLMs](https://preview.aclanthology.org/name-variant-enfa-fane/2025.uncertainlp-main.21/) (Podolak & Verma, UncertaiNLP 2025)
ACL