@inproceedings{Kulkarni2025SIGIRAP,
author = {Kulkarni, Hrishikesh and MacAvaney, Sean and Goharian, Nazli and Frieder, Ophir},
title = {On the Interplay Between Graph Quality, Traversal Strategies, and Performance of ANN Retrieval Methods},
year = {2025},
isbn = {979-8-4007-2218-9/2025/12},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3767695.3769495},
doi = {10.1145/3767695.3769495},
abstract = {State-of-the-art approximate nearest neighbor (ANN) methods like HNSW and LADR use document-document proximity graphs (also known as corpus graphs) to identify relevant documents efficiently. Complete graph construction latency (though built offline) has a quadratic time complexity of the number of documents, which is a major hurdle when scaling these methods. Graph approximations are popular ways to reduce the computational cost of building such corpus graphs. However, approximations come with a cost, namely, a lower quality of corpus graphs. Hence, there is a practical need to understand the tradeoffs between a corpus graph's quality and its effectiveness when used with various ANN methods; in other words, how 'approximate' can a corpus graph be while maintaining strong retrieval effectiveness? We construct approximate (i.e. poorer quality) corpus graphs using various methods and present extensive experiments that analyze the robustness and performance of popular ANN methods on these graphs. Our analysis is performed on multiple datasets, with different parameters and various poor graph simulation strategies. We also analyze different graph traversal approaches for robust and efficient retrieval across graphs of poor quality. We conclude by addressing the utility of these approaches at the billion-scale, practical scenarios by optimizing graph construction and graph traversal stages. We show that robust ANN methods like Adaptive LADR show statistically equivalent performance on poor quality graphs while saving 33% graph construction time.},
booktitle = {Proceedings of the 2025 Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region (SIGIR-AP 2025)},
location = {Xi'an, China},
series = {SIGIR-AP '25}
}