@inproceedings{weichel-etal-2025-robust,
title = "Robust Table Information Extraction from Sustainability Reports: A Time-Aware Hybrid Two-Step Approach",
author = {Weichel, Hendrik and
Simon, Martin and
Sch{\"a}fer, J{\"o}rg},
editor = "Dutia, Kalyan and
Henderson, Peter and
Leippold, Markus and
Manning, Christoper and
Morio, Gaku and
Muccione, Veruska and
Ni, Jingwei and
Schimanski, Tobias and
Stammbach, Dominik and
Singh, Alok and
Su, Alba (Ruiran) and
A. Vaghefi, Saeid",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)",
month = jul,
year = "2025",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.climatenlp-1.16/",
pages = "233--244",
ISBN = "979-8-89176-259-6",
abstract = "The extraction of emissions-related information from annual reports has become increasingly important due to the Corporate Sustainability Reporting Directive (CSRD), which mandates greater transparency in sustainability reporting. As a result, information extraction (IE) methods must be robust, ensuring accurate retrieval while minimizing false values. While large language models (LLMs) offer potential for this task, their black-box nature and lack of specialization in table structures limit their robustness {--} an essential requirement in risk-averse domains. In this work, we present a two-step hybrid approach which optimizes both accuracy and robustness. More precisely, we combine a rule-based step for table IE with a regularized LLM-based step, both leveraging temporal prior knowledge. Our tests demonstrate the advantages of combining structured rules with LLMs. Furthermore, the modular design of our method allows for flexible adaptation to various IE tasks, making it a practical solution for industry applications while also serving as a scalable assistive tool for information extraction."
}
Markdown (Informal)
[Robust Table Information Extraction from Sustainability Reports: A Time-Aware Hybrid Two-Step Approach](https://preview.aclanthology.org/landing_page/2025.climatenlp-1.16/) (Weichel et al., ClimateNLP 2025)
ACL