@inproceedings{zhang-etal-2023-content,
title = "Content- and Topology-Aware Representation Learning for Scientific Multi-Literature",
author = "Zhang, Kai and
Song, Kaisong and
Kang, Yangyang and
Liu, Xiaozhong",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.emnlp-main.465/",
doi = "10.18653/v1/2023.emnlp-main.465",
pages = "7490--7502",
abstract = "Representation learning forms an essential building block in the development of natural language processing architectures. To date, mainstream approaches focus on learning textual information at the sentence- or document-level, unfortunately, overlooking the inter-document connections. This omission decreases the potency of downstream applications, particularly in multi-document settings. To address this issue, embeddings equipped with latent semantic and rich relatedness information are needed. In this paper, we propose SMRC$^{2}$, which extends representation learning to the multi-document level. Our model jointly learns latent semantic information from content and rich relatedness information from topological networks. Unlike previous studies, our work takes multi-document as input and integrates both semantic and relatedness information using a shared space via language model and graph structure. Our extensive experiments confirm the superiority and effectiveness of our approach. To encourage further research in scientific multi-literature representation learning, we will release our code and a new dataset from the biomedical domain."
}
Markdown (Informal)
[Content- and Topology-Aware Representation Learning for Scientific Multi-Literature](https://preview.aclanthology.org/fix-sig-urls/2023.emnlp-main.465/) (Zhang et al., EMNLP 2023)
ACL