@inproceedings{jung-etal-2026-villain,
title = "{VILLAIN} at {AV}er{I}ma{T}e{C}: Verifying Image{--}Text Claims via Multi-Agent Collaboration",
author = "Jung, Jaeyoon and
Yoon, Yejun and
Yoon, Seunghyun and
Park, Kunwoo",
editor = "Akhtar, Mubashara and
Aly, Rami and
Cao, Rui and
Christodoulopoulos, Christos and
Cocarascu, Oana and
Guo, Zhijiang and
Mittal, Arpit and
Schlichtkrull, Michael and
Thorne, James and
Vlachos, Andreas",
booktitle = "Proceedings of the Ninth Fact Extraction and {VER}ification Workshop ({FEVER})",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ccl/2026.fever-1.9/",
pages = "114--126",
ISBN = "979-8-89176-365-4",
abstract = "This paper describes VILLAIN, a multimodal fact-checking system that verifies image-text claims through prompt-based multi-agent collaboration. For the AVerImaTeC shared task, VILLAIN employs vision-language model agents across multiple stages of fact-checking. Textual and visual evidence is retrieved from the knowledge store enriched through additional web collection. To identify key information and address inconsistencies among evidence items, modality-specific and cross-modal agents generate analysis reports. In the subsequent stage, question-answer pairs are produced based on these reports. Finally, the Verdict Prediction agent produces the verification outcome based on the image-text claim and the generated question-answer pairs. Our system ranked first on the leaderboard across all evaluation metrics. The source code is publicly available at https://github.com/ssu-humane/VILLAIN."
}Markdown (Informal)
[VILLAIN at AVerImaTeC: Verifying Image–Text Claims via Multi-Agent Collaboration](https://preview.aclanthology.org/ingest-ccl/2026.fever-1.9/) (Jung et al., FEVER 2026)
ACL