@inproceedings{kingetsu-etal-2026-look,
title = "Look Before You Leap: A Lookahead Reasoning Quality Gate for Speculative Decoding",
author = "Kingetsu, Hiroaki and
Yokoo, Kaoru and
Fukumizu, Kenji and
Kaul, Manohar",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.367/",
pages = "7831--7847",
ISBN = "979-8-89176-380-7",
abstract = "We present a lookahead quality gate (verifier) for speculative decoding for reasoning or chain-of-thought language models. The gate accepts the longest reliable prefix of each k-token lookahead (block-wise) draft. Unlike token-level likelihood search, which is myopic and often rewards verbosity, or tree-level sampling methods that trade accuracy for latency, our approach works at an intermediate granularity. It uses only the base model{'}s hidden states to compute a geometry-based quality score for each prefix, then accepts the longest prefix whose score exceeds a quantile-calibrated threshold estimated from unlabeled prompts. The method integrates seamlessly with speculative/blockwise decoding and adds minimal runtime overhead, requiring no auxiliary heads, reward models, or finetuning. On math and science benchmarks, it improves accuracy over sampling baselines while achieving $2.6-7.9×$ faster generation."
}Markdown (Informal)
[Look Before You Leap: A Lookahead Reasoning Quality Gate for Speculative Decoding](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.367/) (Kingetsu et al., EACL 2026)
ACL