@inproceedings{zhou-etal-2025-ritt,
title = "{RITT}: A Retrieval-Assisted Framework with Image and Text Table Representations for Table Question Answering",
author = "Zhou, Wei and
Mesgar, Mohsen and
Adel, Heike and
Friedrich, Annemarie",
editor = "Chang, Shuaichen and
Hulsebos, Madelon and
Liu, Qian and
Chen, Wenhu and
Sun, Huan",
booktitle = "Proceedings of the 4th Table Representation Learning Workshop",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.trl-workshop.8/",
pages = "86--97",
ISBN = "979-8-89176-268-8",
abstract = "Tables can be represented either as text or as images. Previous works on table question answering (TQA) typically rely on only one representation, neglecting the potential benefits of combining both. In this work, we explore integrating textual and visual table representations using multi-modal large language models (MLLMs) for TQA. Specifically, we propose RITT, a retrieval-assisted framework that first identifies the most relevant part of a table for a given question, then dynamically selects the optimal table representations based on the question type. Experiments demonstrate that our framework significantly outperforms the baseline MLLMs by an average of 13 Exact Match and surpasses two text-only state-of-the-art TQA methods on four TQA benchmarks, highlighting the benefits of leveraging both textual and visual table representations."
}
Markdown (Informal)
[RITT: A Retrieval-Assisted Framework with Image and Text Table Representations for Table Question Answering](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.trl-workshop.8/) (Zhou et al., TRL 2025)
ACL