@inproceedings{reichman-heck-2024-dense,
title = "Dense Passage Retrieval: Is it Retrieving?",
author = "Reichman, Benjamin and
Heck, Larry",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.findings-emnlp.791/",
doi = "10.18653/v1/2024.findings-emnlp.791",
pages = "13540--13553",
abstract = "Large Language Models (LLMs) internally store repositories of knowledge. However, their access to this repository is imprecise and they frequently hallucinate information that is not true or does not exist. A paradigm called Retrieval Augmented Generation (RAG) promises to fix these issues. Dense passage retrieval (DPR) is the first step in this paradigm. In this paper, we analyze the role of DPR fine-tuning and how it affects the model being trained. DPR fine-tunes pre-trained networks to enhance the alignment of the embeddings between queries and relevant textual data. We explore DPR-trained models mechanistically by using a combination of probing, layer activation analysis, and model editing. Our experiments show that DPR training \textbf{decentralizes} how knowledge is stored in the network, creating \textbf{multiple access pathways} to the same information. We also uncover a \textbf{limitation} in this training style: the \textbf{internal knowledge} of the pre-trained model \textbf{bounds} what the retrieval model can retrieve. These findings suggest a few possible directions for dense retrieval: (1) expose the DPR training process to more knowledge so more can be decentralized, (2) inject facts as decentralized representations, (3) model and incorporate knowledge uncertainty in the retrieval process, and (4) directly map internal model knowledge to a knowledge base."
}
Markdown (Informal)
[Dense Passage Retrieval: Is it Retrieving?](https://preview.aclanthology.org/fix-sig-urls/2024.findings-emnlp.791/) (Reichman & Heck, Findings 2024)
ACL
- Benjamin Reichman and Larry Heck. 2024. Dense Passage Retrieval: Is it Retrieving?. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 13540–13553, Miami, Florida, USA. Association for Computational Linguistics.