@inproceedings{bastings-etal-2019-interpretable,
title = "Interpretable Neural Predictions with Differentiable Binary Variables",
author = "Bastings, Jasmijn and
Aziz, Wilker and
Titov, Ivan",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P19-1284/",
doi = "10.18653/v1/P19-1284",
pages = "2963--2977",
abstract = "The success of neural networks comes hand in hand with a desire for more interpretability. We focus on text classifiers and make them more interpretable by having them provide a justification{--}a rationale{--}for their predictions. We approach this problem by jointly training two neural network models: a latent model that selects a rationale (i.e. a short and informative part of the input text), and a classifier that learns from the words in the rationale alone. Previous work proposed to assign binary latent masks to input positions and to promote short selections via sparsity-inducing penalties such as L0 regularisation. We propose a latent model that mixes discrete and continuous behaviour allowing at the same time for binary selections and gradient-based training without REINFORCE. In our formulation, we can tractably compute the expected value of penalties such as L0, which allows us to directly optimise the model towards a pre-specified text selection rate. We show that our approach is competitive with previous work on rationale extraction, and explore further uses in attention mechanisms."
}
Markdown (Informal)
[Interpretable Neural Predictions with Differentiable Binary Variables](https://preview.aclanthology.org/fix-sig-urls/P19-1284/) (Bastings et al., ACL 2019)
ACL