@inproceedings{xie-lukasiewicz-2025-investigating,
title = "Investigating Multi-layer Representations for Dense Passage Retrieval",
author = "Xie, Zhongbin and
Lukasiewicz, Thomas",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.1333/",
doi = "10.18653/v1/2025.findings-emnlp.1333",
pages = "24522--24536",
ISBN = "979-8-89176-335-7",
abstract = "Dense retrieval models usually adopt vectors from the last hidden layer of the document encoder to represent a document, which is in contrast to the fact that representations in different layers of a pre-trained language model usually contain different kinds of linguistic knowledge, and behave differently during fine-tuning. Therefore, we propose to investigate utilizing representations from multiple encoder layers to make up the representation of a document, which we denote Multi-layer Representations (MLR). We first investigate how representations in different layers affect MLR{'}s performance under the multi-vector retrieval setting, and then propose to leverage pooling strategies to reduce multi-vector models to single-vector ones to improve retrieval efficiency. Experiments demonstrate the effectiveness of MLR over dual encoder, ME-BERT and ColBERT in the single-vector retrieval setting, as well as demonstrate that it works well with other advanced training techniques such as retrieval-oriented pre-training and hard negative mining."
}Markdown (Informal)
[Investigating Multi-layer Representations for Dense Passage Retrieval](https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.1333/) (Xie & Lukasiewicz, Findings 2025)
ACL