@inproceedings{guo-etal-2025-candy,
title = "{CANDY}: Benchmarking {LLM}s' Limitations and Assistive Potential in {C}hinese Misinformation Fact-Checking",
author = "Guo, Ruiling and
Yang, Xinwei and
Huang, Chen and
Zhang, Tong and
Hu, Yong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.307/",
doi = "10.18653/v1/2025.findings-emnlp.307",
pages = "5724--5758",
ISBN = "979-8-89176-335-7",
abstract = "The effectiveness of large language models (LLMs) to fact-check misinformation remains uncertain, despite their growing use. To this end, we present CANDY, a benchmark designed to systematically evaluate the capabilities and limitations of LLMs in fact-checking Chinese misinformation. Specifically, we curate a carefully annotated dataset of {\textasciitilde}20k instances. Our analysis shows that current LLMs exhibit limitations in generating accurate fact-checking conclusions, even when enhanced with chain-of-thought reasoning and few-shot prompting. To understand these limitations, we develop a taxonomy to categorize flawed LLM-generated explanations for their conclusions and identify factual fabrication as the most common failure mode. Although LLMs alone are unreliable for fact-checking, our findings indicate their considerable potential to augment human performance when deployed as assistive tools in scenarios. Our dataset and code can be accessed at \url{https://github.com/SCUNLP/CANDY}."
}Markdown (Informal)
[CANDY: Benchmarking LLMs’ Limitations and Assistive Potential in Chinese Misinformation Fact-Checking](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.307/) (Guo et al., Findings 2025)
ACL