@inproceedings{vu-iyyer-2019-encouraging,
title = "Encouraging Paragraph Embeddings to Remember Sentence Identity Improves Classification",
author = "Vu, Tu and
Iyyer, Mohit",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P19-1638/",
doi = "10.18653/v1/P19-1638",
pages = "6331--6338",
abstract = "While paragraph embedding models are remarkably effective for downstream classification tasks, what they learn and encode into a single vector remains opaque. In this paper, we investigate a state-of-the-art paragraph embedding method proposed by Zhang et al. (2017) and discover that it cannot reliably tell whether a given sentence occurs in the input paragraph or not. We formulate a sentence content task to probe for this basic linguistic property and find that even a much simpler bag-of-words method has no trouble solving it. This result motivates us to replace the reconstruction-based objective of Zhang et al. (2017) with our sentence content probe objective in a semi-supervised setting. Despite its simplicity, our objective improves over paragraph reconstruction in terms of (1) downstream classification accuracies on benchmark datasets, (2) faster training, and (3) better generalization ability."
}
Markdown (Informal)
[Encouraging Paragraph Embeddings to Remember Sentence Identity Improves Classification](https://preview.aclanthology.org/fix-sig-urls/P19-1638/) (Vu & Iyyer, ACL 2019)
ACL