@inproceedings{zhen-etal-2025-taming, title = "Taming the Titans: A Survey of Efficient {LLM} Inference Serving", author = "Zhen, Ranran and Li, Juntao and Ji, Yixin and Yang, Zhenlin and Liu, Tong and Xia, Qingrong and Duan, Xinyu and Wang, Zhefeng and Huai, Baoxing and Zhang, Min", editor = "Flek, Lucie and Narayan, Shashi and Phương, L{\^e} Hồng and Pei, Jiahuan", booktitle = "Proceedings of the 18th International Natural Language Generation Conference", month = oct, year = "2025", address = "Hanoi, Vietnam", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-luhme/2025.inlg-main.32/", pages = "522--541" }