@inproceedings{scire-etal-2024-fenice,
title = "{FENICE}: Factuality Evaluation of summarization based on Natural language Inference and Claim Extraction",
author = "Scir{\`e}, Alessandro and
Ghonim, Karim and
Navigli, Roberto",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.841/",
doi = "10.18653/v1/2024.findings-acl.841",
pages = "14148--14161",
abstract = "Recent advancements in text summarization, particularly with the advent of Large Language Models (LLMs), have shown remarkable performance. However, a notable challenge persists as a substantial number of automatically-generated summaries exhibit factual inconsistencies, such as hallucinations. In response to this issue, various approaches for the evaluation of consistency for summarization have emerged. Yet, these newly-introduced metrics face several limitations, including lack of interpretability, focus on short document summaries (e.g., news articles), and computational impracticality, especially for LLM-based metrics. To address these shortcomings, we propose Factuality Evaluation of summarization based on Natural language Inference and Claim Extraction (FENICE), a more interpretable and efficient factuality-oriented metric. FENICE leverages an NLI-based alignment between information in the source document and a set of atomic facts, referred to as \textit{claims}, extracted from the summary. Our metric sets a new state of the art on AGGREFACT, the de-facto benchmark for factuality evaluation. Moreover, we extend our evaluation to a more challenging setting by conducting a human annotation process of long-form summarization. In the hope of fostering research in summarization factuality evaluation, we release the code of our metric and our factuality annotations of long-form summarization at \url{https://github.com/Babelscape/FENICE}."
}
Markdown (Informal)
[FENICE: Factuality Evaluation of summarization based on Natural language Inference and Claim Extraction](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.841/) (Scirè et al., Findings 2024)
ACL