@inproceedings{le-zuidema-2020-dolfin,
title = "{D}o{LFI}n: Distributions over Latent Features for Interpretability",
author = "Le, Phong and
Zuidema, Willem",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.127",
doi = "10.18653/v1/2020.coling-main.127",
pages = "1468--1474",
abstract = "Interpreting the inner workings of neural models is a key step in ensuring the robustness and trustworthiness of the models, but work on neural network interpretability typically faces a trade-off: either the models are too constrained to be very useful, or the solutions found by the models are too complex to interpret. We propose a novel strategy for achieving interpretability that {--} in our experiments {--} avoids this trade-off. Our approach builds on the success of using probability as the central quantity, such as for instance within the attention mechanism. In our architecture, DoLFIn (Distributions over Latent Features for Interpretability), we do no determine beforehand what each feature represents, and features go altogether into an unordered set. Each feature has an associated probability ranging from 0 to 1, weighing its importance for further processing. We show that, unlike attention and saliency map approaches, this set-up makes it straight-forward to compute the probability with which an input component supports the decision the neural model makes. To demonstrate the usefulness of the approach, we apply DoLFIn to text classification, and show that DoLFIn not only provides interpretable solutions, but even slightly outperforms the classical CNN and BiLSTM text classifiers on the SST2 and AG-news datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="le-zuidema-2020-dolfin">
<titleInfo>
<title>DoLFIn: Distributions over Latent Features for Interpretability</title>
</titleInfo>
<name type="personal">
<namePart type="given">Phong</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Willem</namePart>
<namePart type="family">Zuidema</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-dec</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Interpreting the inner workings of neural models is a key step in ensuring the robustness and trustworthiness of the models, but work on neural network interpretability typically faces a trade-off: either the models are too constrained to be very useful, or the solutions found by the models are too complex to interpret. We propose a novel strategy for achieving interpretability that – in our experiments – avoids this trade-off. Our approach builds on the success of using probability as the central quantity, such as for instance within the attention mechanism. In our architecture, DoLFIn (Distributions over Latent Features for Interpretability), we do no determine beforehand what each feature represents, and features go altogether into an unordered set. Each feature has an associated probability ranging from 0 to 1, weighing its importance for further processing. We show that, unlike attention and saliency map approaches, this set-up makes it straight-forward to compute the probability with which an input component supports the decision the neural model makes. To demonstrate the usefulness of the approach, we apply DoLFIn to text classification, and show that DoLFIn not only provides interpretable solutions, but even slightly outperforms the classical CNN and BiLSTM text classifiers on the SST2 and AG-news datasets.</abstract>
<identifier type="citekey">le-zuidema-2020-dolfin</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.127</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.127</url>
</location>
<part>
<date>2020-dec</date>
<extent unit="page">
<start>1468</start>
<end>1474</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DoLFIn: Distributions over Latent Features for Interpretability
%A Le, Phong
%A Zuidema, Willem
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 dec
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F le-zuidema-2020-dolfin
%X Interpreting the inner workings of neural models is a key step in ensuring the robustness and trustworthiness of the models, but work on neural network interpretability typically faces a trade-off: either the models are too constrained to be very useful, or the solutions found by the models are too complex to interpret. We propose a novel strategy for achieving interpretability that – in our experiments – avoids this trade-off. Our approach builds on the success of using probability as the central quantity, such as for instance within the attention mechanism. In our architecture, DoLFIn (Distributions over Latent Features for Interpretability), we do no determine beforehand what each feature represents, and features go altogether into an unordered set. Each feature has an associated probability ranging from 0 to 1, weighing its importance for further processing. We show that, unlike attention and saliency map approaches, this set-up makes it straight-forward to compute the probability with which an input component supports the decision the neural model makes. To demonstrate the usefulness of the approach, we apply DoLFIn to text classification, and show that DoLFIn not only provides interpretable solutions, but even slightly outperforms the classical CNN and BiLSTM text classifiers on the SST2 and AG-news datasets.
%R 10.18653/v1/2020.coling-main.127
%U https://aclanthology.org/2020.coling-main.127
%U https://doi.org/10.18653/v1/2020.coling-main.127
%P 1468-1474
Markdown (Informal)
[DoLFIn: Distributions over Latent Features for Interpretability](https://aclanthology.org/2020.coling-main.127) (Le & Zuidema, COLING 2020)
ACL