@inproceedings{son-thin-2025-sonrobok4,
title = "sonrobok4 Team at {S}em{E}val-2025 Task 8: Question Answering over Tabular Data Using Pandas and Large Language Models",
author = "Son, Nguyen and
Thin, Dang",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.51/",
pages = "357--362",
ISBN = "979-8-89176-273-2",
abstract = "This paper describes the system of the son robok4 team for the SemEval-2025 Task 8: DataBench, Question-Answering over Tabular Data. The task requires answering questions based on the given question and dataset ID, ensuring that the responses are derived solely from the provided table. We address this task by using large language models (LLMs) to translate natural language questions into executable Python code for querying Pandas DataFrames. Furthermore, we employ techniques such as a rerun mechanism for error handling, structured metadata extraction, and dataset preprocessing to enhance performance. Our best-performing system achieved 89.46{\%} accuracy on Subtask 1 and placed in the top 4 on the private test set. Additionally, it achieved 85.25{\%} accuracy on Subtask 2 and placed in the top 9. We mainly focus on Subtask 1. We analyze the effectiveness of different LLMs for structured data reasoning and discuss key challenges in tabular question answering."
}
Markdown (Informal)
[sonrobok4 Team at SemEval-2025 Task 8: Question Answering over Tabular Data Using Pandas and Large Language Models](https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.51/) (Son & Thin, SemEval 2025)
ACL