@inproceedings{sukherman-etal-2026-neat,
title = "{NEAT}-{IR}: Neural Explainable Analysis Tool for Information Retrieval",
author = "Sukherman, Lev and
Frenk, Artem and
Klimenkova, Nina and
Jason, Connor",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-srw.21/",
pages = "240--246",
ISBN = "979-8-89176-393-7",
abstract = "Neural IR models achieve strong performance but remain difficult to interpret. We present NEAT-IR, a black-box analysis framework that explains ColBERT{'}s ranking behavior using 26 classical IR features (BM25, TF-IDF, IDF measures, positional signals). We analyze ColBERT through two complementary lenses: regression (predicting exact scores) and learning-to-rank (predicting relative order), evaluated on MS MARCO (48,250 query-passage pairs). Our key finding is a \textbf{score-rank gap}: classical features preserve ColBERT{'}s rankings nearly perfectly ($NDCG@5 \approx 0.99$) yet explain only $R^2 \approx 0.28$ of score variance. Feature attribution reveals that regression and ranking models rely on distinct feature subsets: query-level IDF signals dominate score prediction, while document-matching features (BM25, cosine TF-IDF) drive ranking preservation. These findings suggest that ColBERT{'}s ordinal behavior on MS MARCO is largely recoverable from classical signals, while neural contributions primarily affect score magnitude. NEAT-IR enables practitioners to diagnose when neural rankers deviate from classical patterns, supporting interpretable model auditing and informed hybrid pipeline design."
}Markdown (Informal)
[NEAT-IR: Neural Explainable Analysis Tool for Information Retrieval](https://preview.aclanthology.org/ingest-acl/2026.acl-srw.21/) (Sukherman et al., ACL 2026)
ACL
- Lev Sukherman, Artem Frenk, Nina Klimenkova, and Connor Jason. 2026. NEAT-IR: Neural Explainable Analysis Tool for Information Retrieval. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026), pages 240–246, San Diego, California, United States. Association for Computational Linguistics.