@inproceedings{yang-etal-2026-explain,
title = "Explain the Synth: Interpretable Evaluation of {LLM} Data Synthesis",
author = "Yang, Yue and
Yang, Fan and
Bai, Yu and
Wang, Hao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1995/",
pages = "43054--43077",
ISBN = "979-8-89176-390-6",
abstract = "Large language models (LLMs) are increasingly used to generate synthetic data, in which tabular data constitute a fundamental data modality across a wide range of domains. Yet, current evaluation practices often provide limited insights into whether the synthetic data preserve real data-generating relationships or introduce plausible-looking artifacts. We present a conceptually simple, interpretable auditing framework that compares the explanatory structure induced by real versus synthetic data. The key idea is to use a transparent rule-based model as a shared explanatory language: we extract rules from real data to summarize how features relate to labels, then examine how this rule structure changes when explained using LLM-generated data. Importantly, these rules are derived by an independent rule auditor rather than by the generator itself. The resulting ``explanation shift'' reveals which relationships are preserved, weakened, removed, or newly introduced by the generator, offering actionable diagnostics beyond aggregate fidelity scores. We further provide a theoretical perspective that links explanation shift and cross-domain predictive gaps to distribution mismatch within an interpretable hypothesis class. Overall, our approach turns synthetic data evaluation into a human-auditable comparison of explanations, improving transparency for LLM-based tabular synthesis."
}Markdown (Informal)
[Explain the Synth: Interpretable Evaluation of LLM Data Synthesis](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1995/) (Yang et al., ACL 2026)
ACL
- Yue Yang, Fan Yang, Yu Bai, and Hao Wang. 2026. Explain the Synth: Interpretable Evaluation of LLM Data Synthesis. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 43054–43077, San Diego, California, United States. Association for Computational Linguistics.