@inproceedings{varangot-reille-etal-2026-generalising,
title = "Generalising {LLM} Routing using Past Performance Retrieval: A Few-Shot Router is Sufficient",
author = "Varangot-Reille, Clovis and
Bouvard, Christophe and
Gourru, Antoine",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.22/",
pages = "304--319",
ISBN = "979-8-89176-383-8",
abstract = "We study model routing for Large Language Model (LLM)-based systems. A model, called the router, dynamically chooses which LLM should handle a given input/query. We challenge the assumption that complex routers are necessary for generalising to new candidate LLMs. We introduce ContextualRouter, a simple meta-evaluation framework that predicts per-model performance for new queries by retrieving similar past queries and reweighting model scores with lightweight attention. During inference, the router balances estimated performance and cost by adjusting a tunable cost penalty parameter. This allows the router to adapt dynamically to the addition or removal of LLMs without the need for retraining. Across five routing benchmarks (SPROUT, RouterBench, LiveBench, BigGenBench, and EmbedLLM), ContextualRouter matches the quality{--}cost trade-offs of other generalisable routers. Surprisingly, a simpler non-parametric baseline, $k$-nearest-neighbour averaging, performs comparably or better, achieving strong performance estimation, high NDCG, and substantial cost savings. Retrieval-based routers remain robust to $k$, embedding size, data sparsity, retrieval degradation, and generalise to unseen queries and models with as little as 1{\%} historical data. These results suggest that effective retrieval alone enables generalisable LLM routing."
}Markdown (Informal)
[Generalising LLM Routing using Past Performance Retrieval: A Few-Shot Router is Sufficient](https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.22/) (Varangot-Reille et al., EACL 2026)
ACL