@inproceedings{zhou-etal-2025-efficiency,
    title = "The Efficiency vs. Accuracy Trade-off: Optimizing {RAG}-Enhanced {LLM} Recommender Systems Using Multi-Head Early Exit",
    author = "Zhou, Huixue  and
      Gu, Hengrui  and
      Zhan, Zaifu  and
      Liu, Xi  and
      Zhou, Kaixiong  and
      Xiao, Yongkang  and
      Liang, Mingfu  and
      Govindan, Srinivas Prasad  and
      Chawla, Piyush  and
      Yang, Jiyan  and
      Meng, Xiangfei  and
      Li, Huayu  and
      Zhang, Buyun  and
      Luo, Liang  and
      Chen, Wen-Yen  and
      Han, Yiping  and
      Long, Bo  and
      Zhang, Rui  and
      Chen, Tianlong",
    editor = "Che, Wanxiang  and
      Nabende, Joyce  and
      Shutova, Ekaterina  and
      Pilehvar, Mohammad Taher",
    booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2025",
    address = "Vienna, Austria",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/landing_page/2025.acl-long.1283/",
    pages = "26443--26458",
    ISBN = "979-8-89176-251-0"
}