@inproceedings{shukla-etal-2025-recon,
title = "Recon, Answer, Verify: Agents in Search of Truth",
author = "Shukla, Satyam and
Dutta, Himanshu and
Bhattacharyya, Pushpak",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-industry.167/",
doi = "10.18653/v1/2025.emnlp-industry.167",
pages = "2429--2448",
ISBN = "979-8-89176-333-3",
abstract = "Human fact-checking is too slow to meet current demands, making automatic fact-checking system an essential alternative. Evaluating such systems is challenging as existing benchmark datasets either suffer from leakage or evidence incompleteness. This limits the realism of current evaluations. We present $\textbf{Politi-Fact-Only (PFO)}$, a 5-class benchmark dataset of 2,982 political claims from politifact.com, where all post-claim analysis and annotator cues have been removed manually from evidence article. After filtration, evidence contains information available prior to the claim{'}s verification. By evaluating PFO, we see an average performance drop of $\textbf{11.39\%}$ in terms of macro-f1 compared to PFO{'}s unfiltered version. Based on the identified challenges of the existing LLM-based fact-checking system, we propose $\textbf{RAV (Recon-Answer-Verify)}$, an agentic framework with three agents, it iteratively generates and answers sub-questions to verify different aspects of the claim before finally generating the label. Unlike prior literature, we worked on reducing the follow-up question complexity by leveraging two 2 types of structured questions, which either validate a fact or inquire about a fact. RAV generalizes across both domains and label granularities, outperforming state-of-the-art methods by $\textbf{57.5\%}$ on PFO $\textit{(political, 5-class)}$ and by $\textbf{3.05\%}$ on the widely used HOVER dataset $\textit{(encyclopedic, 2-class)}$."
}Markdown (Informal)
[Recon, Answer, Verify: Agents in Search of Truth](https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-industry.167/) (Shukla et al., EMNLP 2025)
ACL
- Satyam Shukla, Himanshu Dutta, and Pushpak Bhattacharyya. 2025. Recon, Answer, Verify: Agents in Search of Truth. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 2429–2448, Suzhou (China). Association for Computational Linguistics.