@inproceedings{xu-etal-2021-contrastive-document,
title = "Contrastive Document Representation Learning with Graph Attention Networks",
author = "Xu, Peng and
Chen, Xinchi and
Ma, Xiaofei and
Huang, Zhiheng and
Xiang, Bing",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.findings-emnlp.327/",
doi = "10.18653/v1/2021.findings-emnlp.327",
pages = "3874--3884",
abstract = "Recent progress in pretrained Transformer-based language models has shown great success in learning contextual representation of text. However, due to the quadratic self-attention complexity, most of the pretrained Transformers models can only handle relatively short text. It is still a challenge when it comes to modeling very long documents. In this work, we propose to use a graph attention network on top of the available pretrained Transformers model to learn document embeddings. This graph attention network allows us to leverage the high-level semantic structure of the document. In addition, based on our graph document model, we design a simple contrastive learning strategy to pretrain our models on a large amount of unlabeled corpus. Empirically, we demonstrate the effectiveness of our approaches in document classification and document retrieval tasks."
}
Markdown (Informal)
[Contrastive Document Representation Learning with Graph Attention Networks](https://preview.aclanthology.org/fix-sig-urls/2021.findings-emnlp.327/) (Xu et al., Findings 2021)
ACL