@inproceedings{sun-etal-2025-evaluating,
title = "Evaluating Input Feature Explanations through a Unified Diagnostic Evaluation Framework",
author = "Sun, Jingyi and
Atanasova, Pepa and
Augenstein, Isabelle",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.naacl-long.530/",
pages = "10559--10577",
ISBN = "979-8-89176-189-6",
abstract = "Explaining the decision-making process of machine learning models is crucial for ensuring their reliability and transparency for end users. One popular explanation form highlights key input features, such as i) tokens (e.g., Shapley Values and Integrated Gradients), ii) interactions between tokens (e.g., Bivariate Shapley and Attention-based methods), or iii) interactions between spans of the input (e.g., Louvain Span Interactions). However, these explanation types have only been studied in isolation, making it difficult to judge their respective applicability. To bridge this gap, we develop a unified framework that facilitates an automated and direct comparison between highlight and interactive explanations comprised of four diagnostic properties. We conduct an extensive analysis across these three types of input feature explanations {--} each utilizing three different explanation techniques{--}across two datasets and two models, and reveal that each explanation has distinct strengths across the different diagnostic properties. Nevertheless, interactive span explanations outperform other types of input feature explanations across most diagnostic properties. Despite being relatively understudied, our analysis underscores the need for further research to improve methods generating these explanation types. Additionally, integrating them with other explanation types that perform better in certain characteristics could further enhance their overall effectiveness."
}
Markdown (Informal)
[Evaluating Input Feature Explanations through a Unified Diagnostic Evaluation Framework](https://preview.aclanthology.org/landing_page/2025.naacl-long.530/) (Sun et al., NAACL 2025)
ACL