@inproceedings{miao-fang-2025-user,
title = "User-side Model Consistency Monitoring for Open Source Large Language Models Inference Services",
author = "Miao, Qijun and
Fang, Zhixuan",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.569/",
pages = "11610--11622",
ISBN = "979-8-89176-251-0",
abstract = "With the continuous advancement in the performance of open-source large language models (LLMs), their inference services have attracted a substantial user base by offering quality comparable to closed-source models at a significantly lower cost. However, it has also given rise to trust issues regarding model consistency between users and third-party service providers. Specifically, service providers can effortlessly degrade a model{'}s parameter scale or precision for more margin profits, and although users may perceptibly experience differences in text quality, they often lack a reliable method for concrete monitoring. To address this problem, we propose a paradigm for model consistency monitoring on the user side. It constructs metrics based on the logits produced by LLMs to differentiate sequences generated by degraded models. Furthermore, by leveraging model offloading techniques, we demonstrate that the proposed method is implementable on consumer-grade devices. Metric evaluations conducted on three widely used LLMs series (OPT, Llama 3.1 and Qwen 2.5) along with system prototype efficiency tests on a consumer device (RTX 3080 TI) confirm both the effectiveness and feasibility of the proposed approach."
}
Markdown (Informal)
[User-side Model Consistency Monitoring for Open Source Large Language Models Inference Services](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.569/) (Miao & Fang, ACL 2025)
ACL