@inproceedings{trigg-etal-2026-logic,
title = "Logic-Level Evaluation of Logical Table-to-Text Generation",
author = "Trigg, Lena and
Hougen, Dean F. and
Bilal, Ahsan",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.41/",
pages = "677--691",
ISBN = "979-8-89176-410-1",
abstract = "Logical Table-to-Text (LT2T) generation aims to produce natural-language sentences that are logically faithful to structured tabular data. While recent Large Language Models (LLMs) show high performance on aggregate fidelity metrics, these scores provide only a coarse view of performance, obscuring specific logic-type reasoning failures and models' meta-logical awareness. We propose an operation-aware diagnostic framework that evaluates four core competencies: (1) Logical Form (LF) execution accuracy, (2) fidelity of LF-conditioned generation, (3) logic-type identification, and (4) LF-free generation.We apply this framework to a suite of frontier LLMs and perform fine-grained analysis across logic types such as aggregation, ordinal, and superlative reasoning. Our results show that LT2T fidelity assessment can be unstable; the choice of verifier and logic type can substantially alter conclusions and model rankings. Crucially, we identify a meta-logical gap: models often generate faithful statements while failing to identify the underlying operation."
}Markdown (Informal)
[Logic-Level Evaluation of Logical Table-to-Text Generation](https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.41/) (Trigg et al., CoNLL 2026)
ACL