@inproceedings{singh-2026-debiasing,
title = "Debiasing Logical Fallacy Detection for Real-World Robustness via Counterfactually Augmented Data",
author = "Singh, Navyansh",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-srw.30/",
pages = "363--374",
ISBN = "979-8-89176-393-7",
abstract = "Logical fallacy detection models frequentlyover-flag valid reasoning due to reliance onsurface-level spurious correlations. We in-troduce 703 LLM-generated CounterfactuallyAugmented Data (CAD) pairs{---}minimally dif-ferentiated valid and fallacious arguments{---}todebias models through targeted augmentation.Fine-tuning DeBERTa-v3-large on CoCoLoFaaugmented with these pairs yields marginalin-distribution improvement (+0.4{\%} F1) butsubstantial out-of-distribution robustness: 58{\%}relative reduction in false positive rate (64{\%}{\textrightarrow} 26.7{\%}) on a 300-sample Reddit-sourcedevaluation set. While recent LLMs (Llama-3.1-8B, Llama-3.3-70B) achieve high perfor-mance under optimized prompts (F1 90{--}94{\%}),they degrade severely under simple human-like prompts (F1 63{--}72{\%}, FPR 54{--}74{\%}).Our lightweight, prompt-invariant approachachieves competitive robustness (F1 85.9{\%},FPR 26.7{\%}) across all prompting regimes with-out prompt engineering, making it stable forproduction deployment with unpredictable userinput. The dataset and model are publicly re-leased."
}Markdown (Informal)
[Debiasing Logical Fallacy Detection for Real-World Robustness via Counterfactually Augmented Data](https://preview.aclanthology.org/ingest-acl/2026.acl-srw.30/) (Singh, ACL 2026)
ACL