@inproceedings{wang-2026-rethinking,
title = "Rethinking Scale: Deployment Trade-offs of Small Language Models under Agent Paradigms",
author = "Wang, Xinlin",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.123/",
pages = "1795--1807",
ISBN = "979-8-89176-394-4",
abstract = "Despite the impressive capabilities of large language models, their substantial computational costs, latency, and privacy risks hinder their widespread deployment in real-world applications. Small Language Models (SLMs) with fewer than 10 billion parameters present a promising alternative; however, their inherent limitations in knowledge and reasoning curtail their effectiveness. Existing research primarily focuses on enhancing SLMs through scaling laws or fine-tuning strategies while overlooking the potential of using agent paradigms, such as tool use and multi-agent collaboration, to systematically compensate for the inherent weaknesses of small models. To address this gap, this paper presents the first large-scale, comprehensive study of {\ensuremath{<}}10B open-source models under three paradigms: (1) the base model, (2) a single agent equipped with tools, and (3) a routing-based multi-agent system with collaborative capabilities.Our results show that structured agent frameworks (combining step-by-step reasoning and tool use) substantially improve effectiveness over direct prompting, with single-agent systems achieving the best balance between performance and cost. In contrast, routing-based multi-agent setups introduce additional coordination overhead with limited gains under small-model constraints.Our findings highlight the importance of agent-centric design for efficient and trustworthy deployment in resource-constrained settings."
}Markdown (Informal)
[Rethinking Scale: Deployment Trade-offs of Small Language Models under Agent Paradigms](https://preview.aclanthology.org/ingest-acl/2026.acl-industry.123/) (Wang, ACL 2026)
ACL