@inproceedings{kassem-etal-2026-robust,
title = "How Robust Are Router-{LLM}s? Analysis of the Fragility of {LLM} Routing Capabilities",
author = {Kassem, Aly M. and
Sch{\"o}lkopf, Bernhard and
Jin, Zhijing},
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.351/",
pages = "7496--7507",
ISBN = "979-8-89176-380-7",
abstract = "Large language model (LLM) routing has emerged as a crucial strategy for balancing computational costs with performance by dynamically assigning queries to the most appropriate model based on query complexity. Despite recent advances showing that preference-data-based routers can outperform traditional methods, current evaluation benchmarks remain limited{---}they largely focus on general model capabilities while overlooking task-specific behaviors and critical concerns such as privacy, safety, and potential backdoor vulnerabilities introduced through preference data. In response, we propose the DSC benchmark: Diverse, simple, and categorized, an evaluation framework that categorizes router performance across a broad spectrum of query types{---}including coding, translation, mathematics, human instructions, general knowledge, and LLM jailbreaking{---}and integrates privacy and safety assessments to reveal hidden risks. Our experiments on three preference-based routers and two commercial counterparts demonstrate that while these systems improve efficiency, they often make suboptimal, category-driven decisions; for instance, a BERT-based router directs all coding and mathematics queries to the most powerful LLM{---}even when simpler models would suffice{---}while routing jailbreaking attempts to weaker models, thereby elevating safety risks."
}Markdown (Informal)
[How Robust Are Router-LLMs? Analysis of the Fragility of LLM Routing Capabilities](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.351/) (Kassem et al., EACL 2026)
ACL