@inproceedings{shen-etal-2026-truth,
title = "Truth or Sophistry? {L}o{F}a: A Benchmark for {LLM} Robustness Against Logical Fallacies",
author = "Shen, Xudong and
Yuan, li and
Chen, Ye and
Wu, Xin and
Cai, Yi and
Wu, Zhiyong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1112/",
pages = "24236--24268",
ISBN = "979-8-89176-390-6",
abstract = "While Large Language Models (LLMs) exhibit strong semantic capabilities, their resilience to manipulative linguistic patterns such as logical fallacies remains an underexplored area. Prior work has focused on the ability of LLMs to **identify** or **classify** fallacies, but their robustness against these fallacies in persuasive contexts remains largely unexplored.To address this gap, we introduce **LoFa** (Logical Fallacy), a comprehensive benchmark to evaluate LLM robustness against fallacies. We first construct the **LoFa** dataset via a multi-agent pipeline, pairing factual questions with fallacious arguments. Then, we develop a multi-round debate framework to assess model resilience under sustained attacks.Furthermore, to disentangle robustness from a model{'}s inherent knowledge limitations, we propose a new metric, LFR@k (Logical Fallacy Resistance), to quantify performance. Our experiments reveal that different LLMs exhibit varied robustness to distinct types of fallacies, highlighting unique vulnerability profiles across models."
}Markdown (Informal)
[Truth or Sophistry? LoFa: A Benchmark for LLM Robustness Against Logical Fallacies](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1112/) (Shen et al., ACL 2026)
ACL