@inproceedings{he-etal-2026-uncovering,
title = "Uncovering Hidden Correctness in {LLM} Causal Reasoning via Symbolic Verification",
author = "He, Paul and
Huang, Yinya and
Sachan, Mrinmaya and
Jin, Zhijing",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.56/",
pages = "1231--1250",
ISBN = "979-8-89176-380-7",
abstract = "Large language models (LLMs) are increasingly applied to tasks involving causal reasoning. However, current benchmarks often rely on string matching or surface-level metrics that fail to assess whether a model{'}s output is formally valid under causal semantics. We propose DoVerifier, a symbolic verification framework that checks whether LLM-generated causal expressions are derivable from a given causal graph using rules from do-calculus and probability theory. This allows us to recover correct answers that would otherwise be marked incorrect due to superficial differences. Evaluations on synthetic data and causal QA benchmarks show that DoVerifier more accurately captures semantic correctness than standard metrics, offering a more rigorous and informative way to evaluate LLMs on causal tasks."
}Markdown (Informal)
[Uncovering Hidden Correctness in LLM Causal Reasoning via Symbolic Verification](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.56/) (He et al., EACL 2026)
ACL