@inproceedings{xu-etal-2026-arclight,
title = "{A}rc{L}ight: A Lightweight {LLM} Inference Architecture for Many-Core {CPU}s",
author = "Xu, Yuzhuang and
Han, Xu and
Li, Yuxuan and
Che, Wanxiang",
editor = "Durrett, Greg and
Jian, Ping",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-demo.18/",
pages = "178--186",
ISBN = "979-8-89176-392-0",
abstract = "Although existing frameworks for large language model (LLM) inference on CPUs are mature, they fail to fully exploit the computational potential of many-core CPU platforms. Many-core CPUs are widely deployed in web servers and high-end networking devices, and are typically organized into multiple NUMA nodes that group cores and memory. Current frameworks largely overlook the substantial overhead of cross-NUMA memory access, limiting inference scalability and intelligence enabling on such platforms. To address this limitation, we build ArcLight, a lightweight LLM inference architecture designed from the ground up for many-core CPUs. ArcLight integrates efficient memory management and thread scheduling, and introduces finely controlled tensor parallelism to mitigate the cross-node memory access wall. Experimental results show that ArcLight significantly surpasses the performance ceiling of mainstream frameworks, achieving up to 46{\%} higher inference throughput. Moreover, ArcLight maintains compatibility with arbitrary CPU devices. ArcLight is publicly available at https://github.com/OpenBMB/ArcLight."
}Markdown (Informal)
[ArcLight: A Lightweight LLM Inference Architecture for Many-Core CPUs](https://preview.aclanthology.org/ingest-acl/2026.acl-demo.18/) (Xu et al., ACL 2026)
ACL
- Yuzhuang Xu, Xu Han, Yuxuan Li, and Wanxiang Che. 2026. ArcLight: A Lightweight LLM Inference Architecture for Many-Core CPUs. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 178–186, San Diego, California, United States. Association for Computational Linguistics.