@inproceedings{cao-wang-2021-attention,
title = "Attention Head Masking for Inference Time Content Selection in Abstractive Summarization",
author = "Cao, Shuyang and
Wang, Lu",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.naacl-main.397/",
doi = "10.18653/v1/2021.naacl-main.397",
pages = "5008--5016",
abstract = "How can we effectively inform content selection in Transformer-based abstractive summarization models? In this work, we present a simple-yet-effective attention head masking technique, which is applied on encoder-decoder attentions to pinpoint salient content at inference time. Using attention head masking, we are able to reveal the relation between encoder-decoder attentions and content selection behaviors of summarization models. We then demonstrate its effectiveness on three document summarization datasets based on both in-domain and cross-domain settings. Importantly, our models outperform prior state-of-the-art models on CNN/Daily Mail and New York Times datasets. Moreover, our inference-time masking technique is also data-efficient, requiring only 20{\%} of the training samples to outperform BART fine-tuned on the full CNN/DailyMail dataset."
}
Markdown (Informal)
[Attention Head Masking for Inference Time Content Selection in Abstractive Summarization](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.naacl-main.397/) (Cao & Wang, NAACL 2021)
ACL