@article{liu-etal-2026-sciciteval,
title = "{S}ci{C}ite{V}al: A Multi-Domain Dataset for Scientific Citation Verification",
author = "Liu, Qinyue and
Zhou, Yongxin and
Labbe, Cyril",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.125/",
pages = "1603--1611",
abstract = "Citations are an integral and important part of scientific papers. However, there exist erroneous citations ranging from careless mistakes to deliberate misconduct, and there are currently few studies or benchmark datasets dedicated to automated citation verification. To bridge this gap, we introduce SciCiteVal, a novel, manually annotated dataset for citation verification. Each instance in SciCiteVal pairs a citation context from a citing paper with the corresponding evidence passage extracted from the full text of the cited source. The dataset features a comprehensive taxonomy, where each citation is annotated as ``Correct'', ``Incorrect'', or ``Unrelated'', with the ``Incorrect'' category further divided into five fine-grained sub-categories. The completed dataset comprises over 1,000 annotated citations, distributed as 302 ``Correct'', 302 ``Incorrect'', and 430 ``Unrelated'' instances. We establish a benchmark by evaluating different Large Language Models (LLMs), providing baseline performance and a detailed analysis. We release SciCiteVal as a resource to support the development of citation verification systems and to facilitate research on evidence-based tasks."
}Markdown (Informal)
[SciCiteVal: A Multi-Domain Dataset for Scientific Citation Verification](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.125/) (Liu et al., LREC 2026)
ACL