@inproceedings{chen-etal-2025-augment,
title = "When and How to Augment Your Input: Question Routing Helps Balance the Accuracy and Efficiency of Large Language Models",
author = "Chen, Shufan and
Zheng, He and
Cui, Lei",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.200/",
pages = "3621--3634",
ISBN = "979-8-89176-195-7",
abstract = "Although large language models rely on parametric knowledge to achieve exceptional performance across various question-answering tasks, they still face challenges when addressing knowledge-based long-tail questions. Augmented generation techniques, such as chain-of-thought prompting and retrieval augmentation, can effectively enhance the ability of these models to answer long-tail questions. However, improving accuracy through augmented generation often results in significant latency within question-answering systems. This paper addresses the issue of ``when and how to augment the input'' by proposing an adaptive question routing framework. This framework employs a query router to select the most appropriate augmentation path at the right time, thereby enhancing both the accuracy and efficiency of question-answering systems. Extensive comparative experiments on benchmarks such as AmbigNQ, HotpotQA, MMLU-STEM, and PopQA demonstrate that our method surpasses existing approaches in both accuracy and efficiency. Furthermore, this paper introduces two metrics for evaluating adaptive question augmentation methods and presents a new benchmark for adaptive question augmentation, aiming to advance the field."
}
Markdown (Informal)
[When and How to Augment Your Input: Question Routing Helps Balance the Accuracy and Efficiency of Large Language Models](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.200/) (Chen et al., Findings 2025)
ACL