@inproceedings{stiff-etal-2020-self,
title = "How Self-Attention Improves Rare Class Performance in a Question-Answering Dialogue Agent",
author = "Stiff, Adam and
Song, Qi and
Fosler-Lussier, Eric",
editor = "Pietquin, Olivier and
Muresan, Smaranda and
Chen, Vivian and
Kennington, Casey and
Vandyke, David and
Dethlefs, Nina and
Inoue, Koji and
Ekstedt, Erik and
Ultes, Stefan",
booktitle = "Proceedings of the 21th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = jul,
year = "2020",
address = "1st virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.sigdial-1.24/",
doi = "10.18653/v1/2020.sigdial-1.24",
pages = "196--202",
abstract = "Contextualized language modeling using deep Transformer networks has been applied to a variety of natural language processing tasks with remarkable success. However, we find that these models are not a panacea for a question-answering dialogue agent corpus task, which has hundreds of classes in a long-tailed frequency distribution, with only thousands of data points. Instead, we find substantial improvements in recall and accuracy on rare classes from a simple one-layer RNN with multi-headed self-attention and static word embeddings as inputs. While much research has used attention weights to illustrate what input is important for a task, the complexities of our dialogue corpus offer a unique opportunity to examine how the model represents what it attends to, and we offer a detailed analysis of how that contributes to improved performance on rare classes. A particularly interesting phenomenon we observe is that the model picks up implicit meanings by splitting different aspects of the semantics of a single word across multiple attention heads."
}
Markdown (Informal)
[How Self-Attention Improves Rare Class Performance in a Question-Answering Dialogue Agent](https://preview.aclanthology.org/fix-sig-urls/2020.sigdial-1.24/) (Stiff et al., SIGDIAL 2020)
ACL