@inproceedings{choi-etal-2026-kovidore,
title = "{K}o{V}i{D}o{R}e: A Benchmark for {K}orean Visual Document Retrieval",
author = "Choi, Yongbin and
Song, Yongwoo and
Sung, Mujeen",
editor = "Murray, Kenton and
Kriz, Reno",
booktitle = "Proceedings of the 2nd Workshop on Multimodal Augmented Generation via Multimodal Retrieval ({MAGM}a{R} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.magmar-main.11/",
pages = "54--80",
ISBN = "979-8-89176-425-5",
abstract = "Recent advances in multimodal retrieval have improved the ability to retrieve information from visually rich documents such as PDFs and reports. However, existing benchmarks remain largely centered on English and provide limited coverage of Korean visual documents with complex structures. Furthermore, most existing Korean resources primarily evaluate single-page retrieval, failing to capture realistic scenarios that require evidence aggregation across multiple pages. To address these gaps, we introduce KoViDoRe, a benchmark for Korean visual document retrieval. The dataset is constructed from publicly available Korean documents with diverse layouts, including tables, figures, and multi-column structures. We develop a multi-stage data curation pipeline consisting of structured document parsing, synthetic query generation using both summary-based and context-based strategies, and relevance mapping with human verification. Using KoViDoRe, we evaluate a wide range of multimodal retrieval models and observe that current models struggle to effectively handle Korean visual document retrieval, particularly in settings involving structured content and diverse query types. Motivated by this finding, we further curate a large-scale training dataset, Ko-VDR Train Public, to support the development of retrieval models tailored to Korean visual documents. Together, KoViDoRe and Ko-VDR Train Public provide a unified benchmark and training resource for Korean visual document retrieval."
}Markdown (Informal)
[KoViDoRe: A Benchmark for Korean Visual Document Retrieval](https://preview.aclanthology.org/ingest-acl-workshops/2026.magmar-main.11/) (Choi et al., MAGMaR 2026)
ACL
- Yongbin Choi, Yongwoo Song, and Mujeen Sung. 2026. KoViDoRe: A Benchmark for Korean Visual Document Retrieval. In Proceedings of the 2nd Workshop on Multimodal Augmented Generation via Multimodal Retrieval (MAGMaR 2026), pages 54–80, San Diego, USA. Association for Computational Linguistics.