@inproceedings{behdin-etal-2025-scaling,
    title = "Scaling Down, Serving Fast: Compressing and Deploying Efficient {LLM}s for Recommendation Systems",
    author = "Behdin, Kayhan  and
      Fatahibaarzi, Ata  and
      Song, Qingquan  and
      Dai, Yun  and
      Gupta, Aman  and
      Wang, Zhipeng  and
      Sang, Hejian  and
      Tang, Shao  and
      Dexter, Gregory  and
      Zhu, Sirou  and
      Zhu, Siyu  and
      Dharamsi, Tejas  and
      Kothapalli, Vignesh  and
      Fu, Zhoutong  and
      Cao, Yihan  and
      Hsu, Pin-Lun  and
      Borisyuk, Fedor  and
      Pillai, Natesh S.  and
      Simon, Luke  and
      Mazumder, Rahul",
    editor = "Potdar, Saloni  and
      Rojas-Barahona, Lina  and
      Montella, Sebastien",
    booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
    month = nov,
    year = "2025",
    address = "Suzhou (China)",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.119/",
    pages = "1687--1702",
    ISBN = "979-8-89176-333-3"
}