@inproceedings{ke-etal-2026-sec,
title = "{SEC}-{F}in{T}ables: Evaluating Large Language Models for Detecting Logical Inconsistencies on Tabular Data",
author = "Ke, Shuyan and
Wu, Qiong and
Li, Hui and
Cao, Liujuan",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.764/",
pages = "15587--15607",
ISBN = "979-8-89176-395-1",
abstract = "Large language models (LLMs) are increasingly deployed in high-stakes domains reliant on tabular data (e.g., financial reporting), where undetected logical inconsistencies such as mismatched totals and components can lead to critical errors. Yet, the ability of LLMs to identify such inconsistencies remains poorly understood, hindered by the absence of standardized evaluation frameworks and cell-level annotated datasets. To bridge this gap, we propose a comprehensive benchmark SEC-Fintables comprising 103,395 real-world and error-injected table instances, alongside a novel evaluation protocol that decomposes inconsistency detection into granular sub-tasks. Through evaluating both proprietary and open-source LLMs on SEC-Fintables, we find that contemporary LLMs exhibit only partial competence in detecting logical inconsistencies. Our study reveals key limitations and improvement opportunities for LLMs. We believe SEC-Fintables and our evaluation protocol can serve as a practical resource for advancing reliable inconsistency detection of LLMs in tabular reasoning. We release SEC-Fintables at https://github.com/XIEFOX/SEC-Fintables."
}Markdown (Informal)
[SEC-FinTables: Evaluating Large Language Models for Detecting Logical Inconsistencies on Tabular Data](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.764/) (Ke et al., Findings 2026)
ACL