@inproceedings{xue-etal-2025-rankllm,
title = "{R}ank{LLM}: A Multi-Criteria Decision-Making Method for {LLM} Performance Evaluation in Sentiment Analysis",
author = "Xue, Huzhi and
Zhao, Butian and
Xie, Haihua and
Sun, Zeyu",
editor = "Sun, Maosong and
Duan, Peiyong and
Liu, Zhiyuan and
Xu, Ruifeng and
Sun, Weiwei",
booktitle = "Proceedings of the 24th {C}hina National Conference on Computational Linguistics ({CCL} 2025)",
month = aug,
year = "2025",
address = "Jinan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/ingest-ccl/2025.ccl-1.62/",
pages = "818--830",
abstract = "``Large Language Models (LLMs) have made significant advancements in sentiment analysis, yet their quality and reliability vary widely. Existing LLM evaluation studies are limited in scope,lack a comprehensive framework for integrating diverse capabilities, and fail to quantify the im-pact of prompt design on performance. To address these gaps, this paper introduces a set of LLM evaluation criteria with detailed explanations and mathematical formulations, aiding users in understanding LLM limitations and selecting the most suitable model for sentiment analysis.Using these criteria, we apply the Technique for Order Preference by Similarity to an Ideal Solu-tion (TOPSIS), a classic decision-making method, to rank the performance of LLMs in sentimentanalysis. We evaluated six popular LLMs on three Twitter datasets covering different topics and analyze the impact of prompt design by assessing model-prompt combinations. Additionally,a validation experiment on a publicly available annotated dataset further confirms our ranking results. Finally, our findings offer valuable insights into the evaluation and selection of LLMs for sentiment analysis.''"
}Markdown (Informal)
[RankLLM: A Multi-Criteria Decision-Making Method for LLM Performance Evaluation in Sentiment Analysis](https://preview.aclanthology.org/ingest-ccl/2025.ccl-1.62/) (Xue et al., CCL 2025)
ACL