@inproceedings{ning-etal-2025-efficient, title = "Efficient Inference for Large Language Models {--}Algorithm, Model, and System", author = "Ning, Xuefei and Dai, Guohao and Bai, Haoli and Hou, Lu and Wang, Yu", editor = "Pyatkin, Valentina and Vlachos, Andreas", booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-tutorials.1/", pages = "1--3", ISBN = "979-8-89176-336-4" }