@inproceedings{kim-etal-2026-hybrid,
title = "Hybrid-Vector Retrieval for Visually Rich Documents: Combining Single-Vector Efficiency and Multi-Vector Accuracy",
author = "Kim, Juyeon and
Lee, Geon and
Choi, Dongwon and
Kim, Taeuk and
Shin, Kijung",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.54/",
pages = "1073--1089",
ISBN = "979-8-89176-395-1",
abstract = "Retrieval over visually rich documents is essential for tasks such as legal discovery, scientific search, and enterprise knowledge management. Existing approaches fall into two paradigms: single-vector retrieval, which is efficient but coarse, and multi-vector retrieval, which is accurate but computationally expensive. To address this trade-off, we propose HEAVEN, a plug-and-play two-stage hybrid-vector framework. In the first stage, HEAVEN efficiently retrieves candidate pages using a single-vector method over Visually-Summarized Pages (VS-Pages), which assemble representative visual layouts from multiple pages. In the second stage, it reranks candidates with a multi-vector method while filtering query tokens by linguistic importance to reduce redundant computations. To evaluate retrieval systems under realistic conditions, we also introduce ViMDoc, a benchmark for visually rich, multi-document, and long-document retrieval. Across four benchmarks, HEAVEN attains 99.87{\%} of the Recall@1 performance of multi-vector models on average while reducing per-query computation by 99.82{\%}, achieving efficiency and accuracy. Our code and datasets are available at: https://github.com/juyeonnn/HEAVEN"
}Markdown (Informal)
[Hybrid-Vector Retrieval for Visually Rich Documents: Combining Single-Vector Efficiency and Multi-Vector Accuracy](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.54/) (Kim et al., Findings 2026)
ACL