@inproceedings{shallouf-etal-2025-compuge,
title = "{C}omp{UGE}-Bench: Comparative Understanding and Generation Evaluation Benchmark for Comparative Question Answering",
author = "Shallouf, Ahmad and
Nikishina, Irina and
Biemann, Chris",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Mather, Brodie and
Dras, Mark",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: System Demonstrations",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-demos.19/",
pages = "189--198",
abstract = "This paper presents CompUGE, a comprehensive benchmark designed to evaluate Comparative Question Answering (CompQA) systems. The benchmark is structured around four core tasks: Comparative Question Identification, Object and Aspect Identification, Stance Classification, and Answer Generation. It unifies multiple datasets and provides a robust evaluation platform to compare various models across these sub-tasks. We also create additional all-encompassing CompUGE datasets by filtering and merging the existing ones. The benchmark for comparative question answering sub-tasks is designed as a web application available on HuggingFace Spaces: https://huggingface.co/spaces/uhhlt/CompUGE-Bench"
}
Markdown (Informal)
[CompUGE-Bench: Comparative Understanding and Generation Evaluation Benchmark for Comparative Question Answering](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-demos.19/) (Shallouf et al., COLING 2025)
ACL