@inproceedings{guo-etal-2025-recstream,
title = "{R}ec{S}tream: Graph-aware Stream Management for Concurrent Recommendation Model Online Serving",
author = "Guo, Shuxi and
Qi, Qi and
Sun, Haifeng and
Liao, Jianxin and
Wang, Jingyu",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Darwish, Kareem and
Agarwal, Apoorv",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: Industry Track",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-industry.68/",
pages = "817--826",
abstract = "Recommendation Models (RMs) are crucial for predicting user preferences and enhancing personalized experiences on large-scale platforms. As the application of recommendation models grows, optimizing their online serving performance has become a significant challenge. However, current serving systems perform poorly under highly concurrent scenarios. To address this, we introduce RecStream, a system designed to optimize stream configurations based on model characteristics for handling high concurrency requests. We employ a hybrid Graph Neural Network architecture to determine the best configurations for various RMs. Experimental results demonstrate that RecStream achieves significant performance improvements, reducing latency by up to 74{\%}."
}
Markdown (Informal)
[RecStream: Graph-aware Stream Management for Concurrent Recommendation Model Online Serving](https://preview.aclanthology.org/fix-sig-urls/2025.coling-industry.68/) (Guo et al., COLING 2025)
ACL