@inproceedings{menon-serban-2025-automated,
title = "An Automated {LLM}-based Pipeline for Asset-Level Database Creation to Assess Deforestation Impact",
author = "Menon, Avanija and
Serban, Ovidiu",
editor = "Dutia, Kalyan and
Henderson, Peter and
Leippold, Markus and
Manning, Christoper and
Morio, Gaku and
Muccione, Veruska and
Ni, Jingwei and
Schimanski, Tobias and
Stammbach, Dominik and
Singh, Alok and
Su, Alba (Ruiran) and
A. Vaghefi, Saeid",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2025)",
month = jul,
year = "2025",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.climatenlp-1.10/",
pages = "142--167",
ISBN = "979-8-89176-259-6",
abstract = "The European Union Deforestation Regulation (EUDR) requires companies to prove their products do not contribute to deforestation, creating a critical demand for precise, asset-level environmental impact data. Current databases lack the necessary detail, relying heavily on broad financial metrics and manual data collection, which limits regulatory compliance and accurate environmental modeling. This study presents an automated, end-to-end data extraction pipeline that uses LLMs to create, clean, and validate structured databases, specifically targeting sectors with a high risk of deforestation. The pipeline introduces Instructional, Role-Based, Zero-Shot Chain-of-Thought (IRZ-CoT) prompting to enhance data extraction accuracy and a Retrieval-Augmented Validation (RAV) process that integrates real-time web searches for improved data reliability. Applied to SEC EDGAR filings in the Mining, Oil {\&} Gas, and Utilities sectors, the pipeline demonstrates significant improvements over traditional zero-shot prompting approaches, particularly in extraction accuracy and validation coverage. This work advances NLP-driven automation for regulatory compliance, CSR (Corporate Social Responsibility), and ESG, with broad sectoral applicability."
}