@inproceedings{zhou-etal-2025-efficiency, title = "The Efficiency vs. Accuracy Trade-off: Optimizing {RAG}-Enhanced {LLM} Recommender Systems Using Multi-Head Early Exit", author = "Zhou, Huixue and Gu, Hengrui and Zhan, Zaifu and Liu, Xi and Zhou, Kaixiong and Xiao, Yongkang and Liang, Mingfu and Govindan, Srinivas Prasad and Chawla, Piyush and Yang, Jiyan and Meng, Xiangfei and Li, Huayu and Zhang, Buyun and Luo, Liang and Chen, Wen-Yen and Han, Yiping and Long, Bo and Zhang, Rui and Chen, Tianlong", editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/landing_page/2025.acl-long.1283/", pages = "26443--26458", ISBN = "979-8-89176-251-0" }