@inproceedings{lu-etal-2025-pathway,
title = "Pathway to Relevance: How Cross-Encoders Implement a Semantic Variant of {BM}25",
author = "Lu, Meng and
Chen, Catherine and
Eickhoff, Carsten",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1297/",
pages = "25536--25558",
ISBN = "979-8-89176-332-6",
abstract = "Mechanistic interpretation has greatly contributed to a more detailed understanding of generative language models, enabling significant progress in identifying structures that implement key behaviors through interactions between internal components. In contrast, interpretability in information retrieval (IR) remains relatively coarse-grained, and much is still unknown as to how IR models determine whether a document is relevant to a query. In this work, we address this gap by mechanistically analyzing how one commonly used model, a cross-encoder, estimates relevance. We find that the model extracts traditional relevance signals, such as term frequency and inverse document frequency, in early-to-middle layers. These concepts are then combined in later layers, similar to the well-known probabilistic ranking function, BM25. Overall, our analysis offers a more nuanced understanding of how IR models compute relevance. Isolating these components lays the groundwork for future interventions that could enhance transparency, mitigate safety risks, and improve scalability."
}Markdown (Informal)
[Pathway to Relevance: How Cross-Encoders Implement a Semantic Variant of BM25](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1297/) (Lu et al., EMNLP 2025)
ACL