@inproceedings{yu-2025-towards,
title = "Towards Comprehensive Evaluation of Open-Source Language Models: A Multi-Dimensional, User-Driven Approach",
author = "Yu, Qingchen",
editor = "Dhole, Kaustubh and
Clinciu, Miruna",
booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
month = jul,
year = "2025",
address = "Vienna, Austria and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.1/",
pages = "1--7",
ISBN = "979-8-89176-261-9",
abstract = "With rapid advancements in large language models (LLMs) across artificial intelligence, machine learning, and data sci-ence, there is a growing need for evaluation frameworks that go beyond traditional performance metrics. Conventional methods focus mainly on accuracy and computational metrics, often neglecting user experience and community interaction{---}key elements in open-source environments. This paper intro-duces a multi-dimensional, user-centered evaluation frame-work, integrating metrics like User Engagement Index (UEI), Community Response Rate (CRR), and a Time Weight Factor (TWF) to assess LLMs' real-world impact. Additionally, we propose an adaptive weighting mechanism using Bayesian op-timization to dynamically adjust metric weights for more ac-curate model evaluation. Experimental results confirm that our framework effectively identifies models with strong user engagement and community support, offering a balanced, data-driven approach to open-source LLM evaluation. This frame-work serves as a valuable tool for developers and researchers in selecting and improving open-source models."
}
Markdown (Informal)
[Towards Comprehensive Evaluation of Open-Source Language Models: A Multi-Dimensional, User-Driven Approach](https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.1/) (Yu, GEM 2025)
ACL