@article{von-daniken-etal-2026-c4,
title = "C4: A Multilingual Benchmark for Retrieval-Augmented Generation Based on the Catechism of the Catholic Church and Its Compendium",
author = {von D{\"a}niken, Pius and
Cieliebak, Mark and
Deriu, Jan},
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.590/",
pages = "7446--7456",
abstract = "We introduce a new multilingual case study for evaluating retrieval augmented generation (RAG) systems, based on the Catechism of the Catholic Church and its Compendium. The Catechism is a structured document with numbered paragraphs, officially translated into many languages under strict editorial alignment. The Compendium reformulates this material into a question-answer format with explicit citations to the corresponding paragraphs. Together, they form a set of parallel monolingual corpora that share identical semantic structure, enabling direct, controlled comparison of RAG performance across languages. Beyond its theological origin, this text pair closely mirrors real-world applications of RAG in institutional contexts, such as querying internal policy documents with associated FAQ-style summaries, making it a practical testbed for multilingual retrieval and grounded answer generation. We release our data collection scripts and baseline results for further research."
}Markdown (Informal)
[C4: A Multilingual Benchmark for Retrieval-Augmented Generation Based on the Catechism of the Catholic Church and Its Compendium](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.590/) (von Däniken et al., LREC 2026)
ACL