@inproceedings{sushil-etal-2018-rule,
title = "Rule induction for global explanation of trained models",
author = "Sushil, Madhumita and
{\v{S}}uster, Simon and
Daelemans, Walter",
editor = "Linzen, Tal and
Chrupa{\l}a, Grzegorz and
Alishahi, Afra",
booktitle = "Proceedings of the 2018 {EMNLP} Workshop {B}lackbox{NLP}: Analyzing and Interpreting Neural Networks for {NLP}",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-5411/",
doi = "10.18653/v1/W18-5411",
pages = "82--97",
abstract = "Understanding the behavior of a trained network and finding explanations for its outputs is important for improving the network`s performance and generalization ability, and for ensuring trust in automated systems. Several approaches have previously been proposed to identify and visualize the most important features by analyzing a trained network. However, the relations between different features and classes are lost in most cases. We propose a technique to induce sets of if-then-else rules that capture these relations to globally explain the predictions of a network. We first calculate the importance of the features in the trained network. We then weigh the original inputs with these feature importance scores, simplify the transformed input space, and finally fit a rule induction model to explain the model predictions. We find that the output rule-sets can explain the predictions of a neural network trained for 4-class text classification from the 20 newsgroups dataset to a macro-averaged F-score of 0.80. We make the code available at \url{https://github.com/clips/interpret_with_rules}."
}
Markdown (Informal)
[Rule induction for global explanation of trained models](https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-5411/) (Sushil et al., EMNLP 2018)
ACL
- Madhumita Sushil, Simon Šuster, and Walter Daelemans. 2018. Rule induction for global explanation of trained models. In Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP, pages 82–97, Brussels, Belgium. Association for Computational Linguistics.