@inproceedings{zhang-etal-2025-evaluation,
title = "Evaluation Agent: Efficient and Promptable Evaluation Framework for Visual Generative Models",
author = "Zhang, Fan and
Tian, Shulin and
Huang, Ziqi and
Qiao, Yu and
Liu, Ziwei",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.374/",
pages = "7561--7582",
ISBN = "979-8-89176-251-0",
abstract = "Recent advancements in visual generative models have enabled high-quality image and video generation, opening diverse applications. However, evaluating these models often demands sampling hundreds or thousands of images or videos, making the process computationally expensive, especially for diffusion-based models with inherently slow sampling. Moreover, existing evaluation methods rely on rigid pipelines that overlook specific user needs and provide numerical results without clear explanations. In contrast, humans can quickly form impressions of a model{'}s capabilities by observing only a few samples. To mimic this, we propose the Evaluation Agent framework, which employs human-like strategies for efficient, dynamic, multi-round evaluations using only a few samples per round, while offering detailed, user-tailored analyses. It offers four key advantages: 1) efficiency, 2) promptable evaluation tailored to diverse user needs, 3) explainability beyond single numerical scores, and 4) scalability across various models and tools. Experiments show that Evaluation Agent reduces evaluation time to 10{\%} of traditional methods while delivering comparable results. The Evaluation Agent framework is fully open-sourced to advance research in visual generative models and their efficient evaluation."
}
Markdown (Informal)
[Evaluation Agent: Efficient and Promptable Evaluation Framework for Visual Generative Models](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.374/) (Zhang et al., ACL 2025)
ACL