@inproceedings{sukherman-etal-2026-neat,
title = "{NEAT}-{IR}: Neural Explainable Analysis Tool for Information Retrieval",
author = "Sukherman, Lev and
Frenk, Artem and
Klimenkova, Nina and
Jason, Connor",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-form-platform/2026.acl-srw.21/",
pages = "240--246",
ISBN = "979-8-89176-393-7",
abstract = "Neural IR models achieve strong performance but remain difficult to interpret. We present NEAT-IR, a black-box analysis framework that explains ColBERT{'}s ranking behavior using 26 classical IR features (BM25, TF-IDF, IDF measures, positional signals). We analyze ColBERT through two complementary lenses: regression (predicting exact scores) and learning-to-rank (predicting relative order), evaluated on MS MARCO (48,250 query-passage pairs). Our key finding is a \textbf{score-rank gap}: classical features preserve ColBERT{'}s rankings nearly perfectly ($NDCG@5 \approx 0.99$) yet explain only $R^2 \approx 0.28$ of score variance. Feature attribution reveals that regression and ranking models rely on distinct feature subsets: query-level IDF signals dominate score prediction, while document-matching features (BM25, cosine TF-IDF) drive ranking preservation. These findings suggest that ColBERT{'}s ordinal behavior on MS MARCO is largely recoverable from classical signals, while neural contributions primarily affect score magnitude. NEAT-IR enables practitioners to diagnose when neural rankers deviate from classical patterns, supporting interpretable model auditing and informed hybrid pipeline design."
}Markdown (Informal)
[NEAT-IR: Neural Explainable Analysis Tool for Information Retrieval](https://preview.aclanthology.org/ingestion-form-platform/2026.acl-srw.21/) (Sukherman et al., ACL 2026)
ACL
- Lev Sukherman, Artem Frenk, Nina Klimenkova, and Connor Jason. 2026. NEAT-IR: Neural Explainable Analysis Tool for Information Retrieval. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 240–246, San Diego, California, United States. Association for Computational Linguistics.