@inproceedings{saggau-etal-2023-efficient,
title = "Efficient Document Embeddings via Self-Contrastive Bregman Divergence Learning",
author = "Saggau, Daniel and
Rezaei, Mina and
Bischl, Bernd and
Chalkidis, Ilias",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-acl.771/",
doi = "10.18653/v1/2023.findings-acl.771",
pages = "12181--12190",
abstract = "Learning quality document embeddings is a fundamental problem in natural language processing (NLP), information retrieval (IR), recommendation systems, and search engines. Despite recent advances in the development of transformer-based models that produce sentence embeddings with self-contrastive learning, the encoding of long documents (Ks of words) is still challenging with respect to both efficiency and quality considerations. Therefore, we train Longfomer-based document encoders using a state-of-the-art unsupervised contrastive learning method (SimCSE). Further on, we complement the baseline method -siamese neural network- with additional convex neural networks based on functional Bregman divergence aiming to enhance the quality of the output document representations. We show that overall the combination of a self-contrastive siamese network and our proposed neural Bregman network outperforms the baselines in two linear classification settings on three long document topic classification tasks from the legal and biomedical domains."
}
Markdown (Informal)
[Efficient Document Embeddings via Self-Contrastive Bregman Divergence Learning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-acl.771/) (Saggau et al., Findings 2023)
ACL