@inproceedings{turkstra-etal-2025-trillama,
title = "{T}ri{LL}a{M}a at {CQ}s-Gen 2025: A Two-Stage {LLM}-Based System for Critical Question Generation",
author = "Turkstra, Frieso and
Nabhani, Sara and
Al-Khatib, Khalid",
editor = "Chistova, Elena and
Cimiano, Philipp and
Haddadan, Shohreh and
Lapesa, Gabriella and
Ruiz-Dolz, Ramon",
booktitle = "Proceedings of the 12th Argument mining Workshop",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.argmining-1.34/",
doi = "10.18653/v1/2025.argmining-1.34",
pages = "349--357",
ISBN = "979-8-89176-258-9",
abstract = "This paper presents a new system for generating critical questions in debates, developed for the Critical Questions Generation shared task. Our two-stage approach, combining generation and classification, utilizes LLaMA 3.1 Instruct models (8B, 70B, 405B) with zero-/few-shot prompting. Evaluations on annotated debate data reveal several key insights: few-shot generation with 405B yielded relatively high-quality questions, achieving a maximum possible punctuation score of 73.5. The 70B model outperformed both smaller and larger variants on the classification part. The classifiers showed a strong bias toward labeling generated questions as Useful, despite limited validation. Further, our system, ranked 6 extsuperscriptth, out-performed baselines by 3{\%}. These findings stress the effectiveness of large-sized models for question generation and medium-sized models for classification, and suggest the need for clearer task definitions within prompts to improve classification accuracy."
}
Markdown (Informal)
[TriLLaMa at CQs-Gen 2025: A Two-Stage LLM-Based System for Critical Question Generation](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.argmining-1.34/) (Turkstra et al., ArgMining 2025)
ACL