@inproceedings{lee-etal-2025-promtec,
title = "{PROMTEC}: Fast {LLM} Inference Decoding using Prompt Multi-Lookup with Template Database and Common Sequences",
author = "Lee, Alan Chi-Man and
Cheng, Wing-Sun and
Chan, Calvin Chun-Kit",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.355/",
pages = "6830--6842",
ISBN = "979-8-89176-256-5",
abstract = "We propose PROMTEC, a novel multi-faceted approach to accelerate the inference of large language models (LLMs) by leveraging three key techniques: Prompt Multi-Lookup, Template Datastore, and Common Sequences methods. Prompt Multi-Lookup enhances the autoregressive decoding efficiency by generating multiple candidate sequences from context. Template Datastore exploits structured patterns, particularly in mathematical and code generation tasks, to enable fast and accurate candidate generation. Common Sequences optimize inference by precomputing frequent short sequences in specialized domains. For mathematical generation, PROMTEC achieves a 3.91 $\times$ speedup on the miniF2F benchmark. For code generation, it achieves up to a 4.23 $\times$ speedup on the HumanEval benchmark. This work highlights the potential of integrated candidate generation to accelerate LLM inference while maintaining high-quality outputs."
}
Markdown (Informal)
[PROMTEC: Fast LLM Inference Decoding using Prompt Multi-Lookup with Template Database and Common Sequences](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.355/) (Lee et al., Findings 2025)
ACL