@inproceedings{guo-etal-2025-benchmarking,
title = "Benchmarking Uncertainty Metrics for {LLM} Target-Aware Search",
author = "Guo, Pei-Fu and
Tsai, Yun-Da and
Lin, Shou-De",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.226/",
doi = "10.18653/v1/2025.findings-emnlp.226",
pages = "4230--4238",
ISBN = "979-8-89176-335-7",
abstract = "LLM search methods, such as Chain of Thought (CoT) and Tree of Thought (ToT), enhance LLM reasoning by exploring multiple reasoning paths. When combined with search algorithms like MCTS and Bandit methods, their effectiveness relies heavily on uncertainty estimation to prioritize paths that align with specific search objectives. \textit{However, it remains unclear whether existing LLM uncertainty metrics adequately capture the diverse types of uncertainty required to guide different search objectives.} In this work, we introduce a framework for uncertainty benchmarking, identifying four distinct uncertainty types: Answer, Correctness, Aleatoric, and Epistemic Uncertainty. Each type serves different optimization goals in search. Our experiments demonstrate that current metrics often align with only a subset of these uncertainty types, limiting their effectiveness for objective-aligned search in some cases. These findings highlight the need for additional target-aware uncertainty estimators that can adapt to various optimization goals in LLM search."
}Markdown (Informal)
[Benchmarking Uncertainty Metrics for LLM Target-Aware Search](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.226/) (Guo et al., Findings 2025)
ACL