@article{akhtar-etal-2026-ev2r,
title = "{E}v2{R}: Evaluating Evidence Retrieval in Automated Fact-Checking",
author = "Akhtar, Mubashara and
Schlichtkrull, Michael and
Vlachos, Andreas",
journal = "Transactions of the Association for Computational Linguistics",
volume = "14",
year = "2026",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/ingest-latest-mitpress-cl-tacl/2026.tacl-1.25/",
doi = "10.1162/tacl.a.647",
pages = "530--561",
abstract = "Current automated fact-checking (AFC) approaches typically evaluate evidence either implicitly via the predicted verdicts or through exact matches with predefined closed knowledge sources, such as Wikipedia. However, these methods are limited due to their reliance on evaluation metrics originally designed for other purposes and constraints from closed knowledge sources. In this work, we introduce Ev2R which combines the strengths of reference-based evaluation and verdict-level proxy scoring. Ev2R jointly assesses how well the evidence aligns with the gold references and how reliably it supports the verdict, addressing the shortcomings of prior methods. We evaluate Ev2R against three types of evidence evaluation approaches: reference-based, proxy-reference, and reference-less baselines. Assessments against human ratings and adversarial tests demonstrate that Ev2R consistently outperforms existing scoring approaches in accuracy and robustness. It achieves stronger correlation with human judgments and greater robustness to adversarial perturbations, establishing it as a reliable metric for evidence evaluation in AFC.1"
}Markdown (Informal)
[Ev2R: Evaluating Evidence Retrieval in Automated Fact-Checking](https://preview.aclanthology.org/ingest-latest-mitpress-cl-tacl/2026.tacl-1.25/) (Akhtar et al., TACL 2026)
ACL