@inproceedings{badshah-etal-2025-clev,
title = "{CLEV}: {LLM}-Based Evaluation Through Lightweight Efficient Voting for Free-Form Question-Answering",
author = "Badshah, Sher and
Moustafa, Moamen and
Sajjad, Hassan",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.93/",
pages = "1513--1531",
ISBN = "979-8-89176-303-6",
abstract = "Evaluating free-form Question-Answering (QA) remains a challenge due to its diverse and open-ended nature. Traditional automatic metrics fail to capture semantic equivalence or accommodate the variability of open-ended responses. Leveraging Large Language Models (LLMs) as evaluators offers a promising alternative due to their strong language understanding and instruction-following capabilities. We propose the Consensus via Lightweight Efficient Voting (CLEV), which employs two primary LLMs as judges and engages a third judge only in cases of disagreement. This approach prioritizes evaluation reliability while reducing unnecessary computational demands. Through experiments, including human evaluation, we demonstrate CLEV{'}s ability to provide consistent, scalable, and resource-efficient assessments, establishing it as a robust framework for evaluating LLMs on free-form QA."
}Markdown (Informal)
[CLEV: LLM-Based Evaluation Through Lightweight Efficient Voting for Free-Form Question-Answering](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.93/) (Badshah et al., Findings 2025)
ACL