@inproceedings{toles-etal-2025-learning,
title = "Learning and Evaluating Factual Clarification Question Generation Without Examples",
author = "Toles, Matthew and
Huang, Yukun and
Yu, Zhou",
editor = "Dhole, Kaustubh and
Clinciu, Miruna",
booktitle = "Proceedings of the Fourth Workshop on Generation, Evaluation and Metrics (GEM{\texttwosuperior})",
month = jul,
year = "2025",
address = "Vienna, Austria and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.15/",
pages = "200--211",
ISBN = "979-8-89176-261-9",
abstract = "Real-world tasks such as giving legal or technical advice often depend on context that is initially missing at the outset. The ability to derive missing factual information by asking clarifying questions (ACQ) is an important element of real-life collaboration on such reasoning tasks. Although intent disambiguation has been heavily investigated, factual reasoning remains underexplored. To enable evaluation of factual domain clarification question generation, we present a new task that focuses on the ability to elicit missing information in multi-hop reasoning tasks. We observe that humans outperform GPT-4o by a large margin, while Llama 3 8B Instruct does not even beat the dummy baseline in some metrics. Finally, we find that by fine-tuning Llama 3 8B Instruct on its own generations filtered via rejection sampling, we can improve information recovery by 27.6{\%} without using any manually labeled data."
}
Markdown (Informal)
[Learning and Evaluating Factual Clarification Question Generation Without Examples](https://preview.aclanthology.org/corrections-2025-08/2025.gem-1.15/) (Toles et al., GEM 2025)
ACL