@inproceedings{sultan-astudillo-2026-confidence,
title = "Confidence-Weighted Token Set Cover for Early Hypothesis Pruning in Self-Consistency",
author = "Sultan, Md Arafat and
Astudillo, Ram{\textbackslash}'on Fernandez",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.2046/",
pages = "41148--41155",
ISBN = "979-8-89176-395-1",
abstract = "Despite its simplicity and efficacy, the high token expenditure of self-consistency can limit its practical utility. We investigate whether early hypothesis pruning can improve the token efficiency of self-consistency for long chain-of-thought reasoning tasks, while preserving its parallelism. Concretely, we generate all solutions in parallel but periodically prune intermediate hypotheses based on two lightweight indicators: (a) the model{'}s confidence in each hypothesis, and (b) the lexical coverage of all current hypotheses by candidate subsets. We design a fast weighted set cover algorithm that utilizes the two indicators; evaluation of five LLMs on three math benchmarks shows that our method improves token efficiency in most cases, with reductions of 10-35{\%} in many."
}Markdown (Informal)
[Confidence-Weighted Token Set Cover for Early Hypothesis Pruning in Self-Consistency](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.2046/) (Sultan & Astudillo, Findings 2026)
ACL