@inproceedings{gao-etal-2025-i2r,
title = "{I}2{R}-{NLP} at {S}em{E}val-2025 Task 8: Question Answering on Tabular Data",
author = "Gao, Yuze and
Chen, Bin and
Su, Jian",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.14/",
pages = "90--101",
ISBN = "979-8-89176-273-2",
abstract = "We present a Large Language Model (LLM) based system for question answering (QA) over tabular data that leverages multi-turn prompting to automatically generate executable Pandas functions. Our framework decomposes the problem into three key steps: (1) Answer Type Identification, where the system identifies the expected format of the response (e.g., boolean, number, category); (2) Pandas Function Generation, which generates a corresponding Pandas function using table metadata and in-context examples, and (3) Error Correction and Regeneration, where iteratively refining the function based on error feedback from executions. Evaluations on the SemEval-2025 Task 8 Tabular QA benchmark (Grijalba et al., 2024) demonstrate that our multi-turn approach significantly outperforms single-turn prompting models in exact match accuracy by 7.3{\%}. The proposed system not only improves code generation robustness but also paves the way for enhanced and adaptability in table-QA reasoning tasks. Our implementation is available at https://github.com/Gyyz/Question{\_}Answering-over-Tabular-Data."
}
Markdown (Informal)
[I2R-NLP at SemEval-2025 Task 8: Question Answering on Tabular Data](https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.14/) (Gao et al., SemEval 2025)
ACL