@inproceedings{dong-etal-2025-tablecoder,
title = "{T}able{C}oder: Table Extraction from Text via Reliable Code Generation",
author = "Dong, Haoyu and
Hu, Yue and
Peng, Huailiang and
Cao, Yanan",
editor = "Rehm, Georg and
Li, Yunyao",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.acl-industry.98/",
doi = "10.18653/v1/2025.acl-industry.98",
pages = "1399--1412",
ISBN = "979-8-89176-288-6",
abstract = "This paper introduces a task aimed at extracting structured tables from text using natural language (NL) instructions. We present TableCoder, an approach that leverages the symbolic nature of code to enhance the robustness of table structure construction and content extraction. TableCoder first generates Python classes or SQL statements to explicitly construct table structures, capturing semantic ontology, computational dependencies, numerical properties, and format strings. This approach reliably mitigates issues such as structural errors, erroneous computations, and mismatched value types. Subsequently, TableCoder proposes grounded content extraction, populating table cells sequentially and maintaining the exact order in which they are mentioned in the source text. By simulating a grounded ``translation'' from text to code, this method reduces the likelihood of omissions and hallucinations.Experimental results demonstrate that TableCoder significantly improves F1 scores and mitigates hallucination and computational errors, crucial for high-stakes applications like government data analytics and financial compliance reporting. Moreover, the code-generation-based method naturally integrates with standard SQL databases and Python workflows, ensuring seamless deployment in existing enterprise data pipelines."
}
Markdown (Informal)
[TableCoder: Table Extraction from Text via Reliable Code Generation](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.acl-industry.98/) (Dong et al., ACL 2025)
ACL