@inproceedings{kong-etal-2026-sure,
title = "{SURE} or Not? Investigating Semantic Understanding in Dense Retrieval Models",
author = "Kong, Lingdi and
Chen, Xuanang and
He, Ben and
Sun, Le",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.2127/",
pages = "45873--45887",
ISBN = "979-8-89176-390-6",
abstract = "Dense retrieval has become a core technique in applications like web search and retrieval-augmented generation. Despite their empirical success, it remains unclear whether these models truly understand semantics. To address this gap, this paper conducts a systematic investigation by introducing $SURE$, a benchmark for $S$emantic $U$nderstanding in dense $RE$trieval built upon the MSMARCO, NQ, and FiQA datasets. SURE characterizes semantic understanding in dense retrieval along three dimensions: semantic precision, semantic abstraction, and semantic equivalence. We evaluate ten representative models ranging from 110M to 8B parameters, including both general-purpose and domain-specific models. Results show that current dense retrievers struggle to distinguish fine-grained semantic differences across texts with varying information density, and to recognize semantic consistency under lexical paraphrasing. Moreover, larger models do not necessarily exhibit stronger semantic understanding, and diverse training data generally enhances semantic understanding on challenging retrieval tasks."
}Markdown (Informal)
[SURE or Not? Investigating Semantic Understanding in Dense Retrieval Models](https://preview.aclanthology.org/ingest-acl/2026.acl-long.2127/) (Kong et al., ACL 2026)
ACL