@inproceedings{zhu-2024-fast,
title = "Fast Exact Retrieval for Nearest-neighbor Lookup ({FERN})",
author = "Zhu, Richard",
editor = "Cao, Yang (Trista) and
Papadimitriou, Isabel and
Ovalle, Anaelia and
Zampieri, Marcos and
Ferraro, Francis and
Swayamdipta, Swabha",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-srw.6/",
doi = "10.18653/v1/2024.naacl-srw.6",
pages = "42--47",
abstract = "Exact nearest neighbor search is a computationally intensive process, and even its simpler sibling {---} vector retrieval {---} can be computationally complex. This is exacerbated when retrieving vectors which have high-dimension $d$ relative to the number of vectors, $N$, in the database. Exact nearest neighbor retrieval has been generally acknowledged to be a $O(Nd)$ problem with no sub-linear solutions. Attention has instead shifted towards Approximate Nearest-Neighbor (ANN) retrieval techniques, many of which have sub-linear or even logarithmic time complexities. However, if our intuition from binary search problems (e.g. $d=1$ vector retrieval) carries, there ought to be a way to retrieve an organized representation of vectors without brute-forcing our way to a solution. For low dimension (e.g. $d=2$ or $d=3$ cases), kd-trees provide a $O(d\log N)$ algorithm for retrieval. Unfortunately the algorithm deteriorates rapidly to a $O(dN)$ solution at high dimensions (e.g. $k=128$), in practice. We propose a novel algorithm for logarithmic Fast Exact Retrieval for Nearest-neighbor lookup (FERN), inspired by kd-trees. The algorithm achieves $O(d\log N)$ look-up with 100{\%} recall on 10 million $d=128$ uniformly randomly generated vectors."
}
Markdown (Informal)
[Fast Exact Retrieval for Nearest-neighbor Lookup (FERN)](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-srw.6/) (Zhu, NAACL 2024)
ACL
- Richard Zhu. 2024. Fast Exact Retrieval for Nearest-neighbor Lookup (FERN). In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop), pages 42–47, Mexico City, Mexico. Association for Computational Linguistics.