@inproceedings{el-baff-etal-2025-criticalbrew,
title = "{C}ritical{B}rew at {CQ}s-Gen 2025: Collaborative Multi-Agent Generation and Evaluation of Critical Questions for Arguments",
author = "El Baff, Roxanne and
Opitz, Dominik and
Diallo, Diaoul{\'e}",
editor = "Chistova, Elena and
Cimiano, Philipp and
Haddadan, Shohreh and
Lapesa, Gabriella and
Ruiz-Dolz, Ramon",
booktitle = "Proceedings of the 12th Argument mining Workshop",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.argmining-1.30/",
doi = "10.18653/v1/2025.argmining-1.30",
pages = "314--321",
ISBN = "979-8-89176-258-9",
abstract = "This paper presents the \textit{CriticalBrew} submission to the CQs-Gen 2025 shared task, which focuses on generating critical questions (CQs) for a given argument. Our approach employs a multi-agent framework containing two sequential components: 1) \textbf{Generation}: machine society simulation for generating CQs and 2) \textbf{Evaluation}: LLM-based evaluation for selecting the top three questions. The first models collaboration as a sequence of thinking patterns (e.g., \textit{debate} {\textrightarrow} \textit{reflect}). The second assesses the generated questions using zero-shot prompting, evaluating them against several criteria (e.g., depth). Experiments with different open-weight LLMs (small vs. large) consistently outperformed the baseline, a single LLM with zero-shot prompting. Two configurations, agent count and thinking patterns, significantly impacted the performance in the shared task{'}s CQ-usefulness evaluation, whereas different LLM-based evaluation strategies (e.g., scoring) had no impact. Our code is available on GitHub."
}
Markdown (Informal)
[CriticalBrew at CQs-Gen 2025: Collaborative Multi-Agent Generation and Evaluation of Critical Questions for Arguments](https://preview.aclanthology.org/landing_page/2025.argmining-1.30/) (El Baff et al., ArgMining 2025)
ACL