@inproceedings{li-etal-2026-causalitycheck,
title = "{C}ausality{C}heck: A Framework for Evaluating Causal Reasoning in Large Language Models",
author = "Li, Jiang and
Duo, Zehua and
Gao, Guanglai and
Su, Xiangdong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.808/",
pages = "16403--16429",
ISBN = "979-8-89176-395-1",
abstract = "Causal reasoning is a crucial component of understanding complex phenomena and building intelligent systems. Recent advancements in large language models (LLMs) have demonstrated their strong capabilities in reasoning tasks; however, their true understanding of causal relationships remains limited, particularly in cases where causal chains are misidentified or reliance on empirical inference occurs. To mitigate the risk that models misclassify data as false positives due to these issues, we introduce CausalityCheck, an automated tool designed to efficiently generate causal reasoning checklists. This checklist enables the creation of multi-task causal reasoning datasets with task generalization and reasoning robustness from a single causal reasoning dataset. Using CausalityCheck, we developed CausalityCheck-CP to assess the causal reasoning abilities of 18 LLMs. This framework also measures the extent to which causal chains are misidentified or rely on empirical inferences. Our results indicate that the current large language models still face two critical issues when handling complex causal reasoning tasks: incorrect identification of causal chains and reliance on empirical inference. The code and data are available at https://github.com/dzh597/CausalityCheck."
}Markdown (Informal)
[CausalityCheck: A Framework for Evaluating Causal Reasoning in Large Language Models](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.808/) (Li et al., Findings 2026)
ACL