@inproceedings{jiang-bansal-2018-closed,
title = "Closed-Book Training to Improve Summarization Encoder Memory",
author = "Jiang, Yichen and
Bansal, Mohit",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-1440/",
doi = "10.18653/v1/D18-1440",
pages = "4067--4077",
abstract = "A good neural sequence-to-sequence summarization model should have a strong encoder that can distill and memorize the important information from long input texts so that the decoder can generate salient summaries based on the encoder`s memory. In this paper, we aim to improve the memorization capabilities of the encoder of a pointer-generator model by adding an additional {\textquoteleft}closed-book' decoder without attention and pointer mechanisms. Such a decoder forces the encoder to be more selective in the information encoded in its memory state because the decoder can`t rely on the extra information provided by the attention and possibly copy modules, and hence improves the entire model. On the CNN/Daily Mail dataset, our 2-decoder model outperforms the baseline significantly in terms of ROUGE and METEOR metrics, for both cross-entropy and reinforced setups (and on human evaluation). Moreover, our model also achieves higher scores in a test-only DUC-2002 generalizability setup. We further present a memory ability test, two saliency metrics, as well as several sanity-check ablations (based on fixed-encoder, gradient-flow cut, and model capacity) to prove that the encoder of our 2-decoder model does in fact learn stronger memory representations than the baseline encoder."
}
Markdown (Informal)
[Closed-Book Training to Improve Summarization Encoder Memory](https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-1440/) (Jiang & Bansal, EMNLP 2018)
ACL