{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T02:43:03Z","timestamp":1770345783073,"version":"3.49.0"},"reference-count":101,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T00:00:00Z","timestamp":1765929600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T00:00:00Z","timestamp":1765929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"crossref","award":["2023YFB4503600"],"award-info":[{"award-number":["2023YFB4503600"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["World Wide Web"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s11280-025-01389-1","type":"journal-article","created":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T10:42:02Z","timestamp":1765968122000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["BraveANN: Robust Approximate Nearest Neighbor Search for Billion-Scale Vectors"],"prefix":"10.1007","volume":"29","author":[{"given":"Shengkun","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Yiming","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Jinshan","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Sheng","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yuan","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yuhui","family":"Lai","sequence":"additional","affiliation":[]},{"given":"Zhiyong","family":"Peng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,17]]},"reference":[{"issue":"1","key":"1389_CR1","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1109\/TKDE.2013.109","volume":"26","author":"X Wu","year":"2014","unstructured":"Wu, X., Zhu, X., Wu, G., Ding, W.: Data mining with big data. IEEE Trans. Knowl. Data Eng. 26(1), 97\u2013107 (2014)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"6582","key":"1389_CR2","doi-asserted-by":"publisher","first-page":"1780","DOI":"10.1126\/science.abg1780","volume":"375","author":"R Nathan","year":"2022","unstructured":"Nathan, R., Monk, C.T., Arlinghaus, R., Adam, T., Al\u00f3s, J., Assaf, M., Baktoft, H., Beardsworth, C.E., Bertram, M.G., Bijleveld, A.I., et al.: Big-data approaches lead to an increased understanding of the ecology of animal movement. Science 375(6582), 1780 (2022)","journal-title":"Science"},{"key":"1389_CR3","doi-asserted-by":"crossref","unstructured":"Jindal, A., Qiao, S., Sen, R., Patel, H.: Microlearner: A fine-grained learning optimizer for big data workloads at microsoft. In: ICDE, pp. 2423\u20132434 (2021)","DOI":"10.1109\/ICDE51399.2021.00275"},{"issue":"12","key":"1389_CR4","doi-asserted-by":"publisher","first-page":"1986","DOI":"10.14778\/3352063.3352116","volume":"12","author":"F Nargesian","year":"2019","unstructured":"Nargesian, F., Zhu, E., Miller, R.J., Pu, K.Q., Arocena, P.C.: Data lake management: challenges and opportunities. Proc. VLDB Endow. 12(12), 1986\u20131989 (2019)","journal-title":"Proc. VLDB Endow."},{"issue":"7","key":"1389_CR5","doi-asserted-by":"publisher","first-page":"1920","DOI":"10.1109\/TKDE.2015.2427795","volume":"27","author":"H Zhang","year":"2015","unstructured":"Zhang, H., Chen, G., Ooi, B.C., Tan, K., Zhang, M.: In-memory big data management and processing: a survey. IEEE Trans. Knowl. Data Eng. 27(7), 1920\u20131948 (2015)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"11","key":"1389_CR6","doi-asserted-by":"publisher","first-page":"2385","DOI":"10.1109\/TIP.2009.2025923","volume":"18","author":"MP Sampat","year":"2009","unstructured":"Sampat, M.P., Wang, Z., Gupta, S., Bovik, A.C., Markey, M.K.: Complex wavelet structural similarity: a new image similarity index. IEEE Trans. Image Process. 18(11), 2385\u20132401 (2009)","journal-title":"IEEE Trans. Image Process."},{"issue":"2","key":"1389_CR7","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1145\/201040.201041","volume":"13","author":"VN Gudivada","year":"1995","unstructured":"Gudivada, V.N., Raghavan, V.V.: Design and evaluation of algorithms for image retrieval by spatial similarity. ACM Trans. Inf. Syst. 13(2), 115\u2013144 (1995)","journal-title":"ACM Trans. Inf. Syst."},{"issue":"6","key":"1389_CR8","doi-asserted-by":"publisher","first-page":"1505","DOI":"10.1109\/JSTSP.2022.3188113","volume":"16","author":"S Chen","year":"2022","unstructured":"Chen, S., Wang, C., Chen, Z., Wu, Y., Liu, S., Chen, Z., Li, J., Kanda, N., Yoshioka, T., Xiao, X., Wu, J., Zhou, L., Ren, S., Qian, Y., Qian, Y., Wu, J., Zeng, M., Yu, X., Wei, F.: Wavlm: Large-scale self-supervised pre-training for full stack speech processing. IEEE J. Sel. Top. Signal Process. 16(6), 1505\u20131518 (2022)","journal-title":"IEEE J. Sel. Top. Signal Process."},{"key":"1389_CR9","unstructured":"Le, M., Vyas, A., Shi, B., Karrer, B., Sari, L., Moritz, R., Williamson, M., Manohar, V., Adi, Y., Mahadeokar, J., Hsu, W.: Voicebox: Text-guided multilingual universal speech generation at scale. In: NeurIPS (2023)"},{"issue":"2","key":"1389_CR10","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1145\/3440755","volume":"54","author":"D Chandrasekaran","year":"2022","unstructured":"Chandrasekaran, D., Mago, V.: Evolution of semantic similarity - A survey. ACM Comput. Surv. 54(2), 41\u201314137 (2022)","journal-title":"ACM Comput. Surv."},{"key":"1389_CR11","doi-asserted-by":"crossref","unstructured":"Zhou, K., Ethayarajh, K., Card, D., Jurafsky, D.: Problems with cosine as a measure of embedding similarity for high frequency words. In: ACL, pp. 401\u2013423 (2022)","DOI":"10.18653\/v1\/2022.acl-short.45"},{"key":"1389_CR12","doi-asserted-by":"crossref","unstructured":"Tao, Y., Yi, K., Sheng, C., Kalnis, P.: Quality and efficiency in high dimensional nearest neighbor search. In: SIGMOD, pp. 563\u2013576 (2009)","DOI":"10.1145\/1559845.1559905"},{"key":"1389_CR13","doi-asserted-by":"crossref","unstructured":"Seidl, T., Kriegel, H.: Optimal multi-step k-nearest neighbor search. In: SIGMOD, pp. 154\u2013165 (1998)","DOI":"10.1145\/276304.276319"},{"key":"1389_CR14","doi-asserted-by":"crossref","unstructured":"Li, C., Zhang, M., Andersen, D.G., He, Y.: Improving approximate nearest neighbor search through learned adaptive early termination. In: SIGMOD, pp. 2539\u20132554 (2020)","DOI":"10.1145\/3318464.3380600"},{"issue":"2","key":"1389_CR15","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1145\/3589282","volume":"1","author":"J Gao","year":"2023","unstructured":"Gao, J., Long, C.: High-dimensional approximate nearest neighbor search: with reliable and efficient distance comparison operations. Proc. ACM Manag. Data. 1(2), 137\u2013113727 (2023)","journal-title":"Proc. ACM Manag. Data."},{"issue":"12","key":"1389_CR16","doi-asserted-by":"publisher","first-page":"3548","DOI":"10.14778\/3554821.3554843","volume":"15","author":"R Guo","year":"2022","unstructured":"Guo, R., Luan, X., Xiang, L., Yan, X., Yi, X., Luo, J., Cheng, Q., Xu, W., Luo, J., Liu, F., Cao, Z., Qiao, Y., Wang, T., Tang, B., Xie, C.: Manu: A cloud native vector database management system. Proc. VLDB Endow. 15(12), 3548\u20133561 (2022)","journal-title":"Proc. VLDB Endow."},{"key":"1389_CR17","unstructured":"Ren, J., Zhang, M., Li, D.: HM-ANN: efficient billion-point nearest neighbor search on heterogeneous memory. In: NeurIPS (2020)"},{"key":"1389_CR18","doi-asserted-by":"crossref","unstructured":"Wang, J., Yi, X., Guo, R., Jin, H., Xu, P., Li, S., Wang, X., Guo, X., Li, C., Xu, X., Yu, K., Yuan, Y., Zou, Y., Long, J., Cai, Y., Li, Z., Zhang, Z., Mo, Y., Gu, J., Jiang, R., Wei, Y., Xie, C.: Milvus: A purpose-built vector data management system. In: SIGMOD, pp. 2614\u20132627 (2021)","DOI":"10.1145\/3448016.3457550"},{"key":"1389_CR19","doi-asserted-by":"crossref","unstructured":"Yang, W., Li, T., Fang, G., Wei, H.: PASE: postgresql ultra-high-dimensional approximate nearest neighbor search extension. In: SIGMOD, pp. 2241\u20132253 (2020)","DOI":"10.1145\/3318464.3386131"},{"issue":"14","key":"1389_CR20","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.14778\/2556549.2556574","volume":"6","author":"N Sundaram","year":"2013","unstructured":"Sundaram, N., Turmukhametova, A., Satish, N., Mostak, T., Indyk, P., Madden, S., Dubey, P.: Streaming similarity search over one billion tweets using parallel locality-sensitive hashing. Proc. VLDB Endow. 6(14), 1930\u20131941 (2013)","journal-title":"Proc. VLDB Endow."},{"issue":"12","key":"1389_CR21","doi-asserted-by":"publisher","first-page":"3152","DOI":"10.14778\/3415478.3415541","volume":"13","author":"C Wei","year":"2020","unstructured":"Wei, C., Wu, B., Wang, S., Lou, R., Zhan, C., Li, F., Cai, Y.: Analyticdb-v: A hybrid analytical engine towards query fusion for structured and unstructured data. Proc. VLDB Endow. 13(12), 3152\u20133165 (2020)","journal-title":"Proc. VLDB Endow."},{"issue":"8","key":"1389_CR22","doi-asserted-by":"publisher","first-page":"906","DOI":"10.14778\/3204028.3204034","volume":"11","author":"A Arora","year":"2018","unstructured":"Arora, A., Sinha, S., Kumar, P., Bhattacharya, A.: Hd-index: Pushing the scalability-accuracy boundary for approximate knn search in high-dimensional spaces. Proc. VLDB Endow. 11(8), 906\u2013919 (2018)","journal-title":"Proc. VLDB Endow."},{"key":"1389_CR23","doi-asserted-by":"crossref","unstructured":"Datar, M., Immorlica, N., Indyk, P., Mirrokni, V.S.: Locality-sensitive hashing scheme based on p-stable distributions. In: SCG, pp. 253\u2013262 (2004)","DOI":"10.1145\/997817.997857"},{"issue":"4","key":"1389_CR24","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2018.2889473","volume":"42","author":"YA Malkov","year":"2020","unstructured":"Malkov, Y.A., Yashunin, D.A.: Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE Trans. Pattern Anal. Mach. Intell. 42(4), 824\u2013836 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"1389_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.14778\/2735461.2735462","volume":"8","author":"Y Sun","year":"2014","unstructured":"Sun, Y., Wang, W., Qin, J., Zhang, Y., Lin, X.: SRS: solving c-approximate nearest neighbor queries in high dimensional euclidean space with a tiny index. Proc. VLDB Endow. 8(1), 1\u201312 (2014)","journal-title":"Proc. VLDB Endow."},{"key":"1389_CR26","doi-asserted-by":"crossref","unstructured":"Zhao, W., Tan, S., Li, P.: SONG: approximate nearest neighbor search on GPU. In: ICDE, pp. 1033\u20131044 (2020)","DOI":"10.1109\/ICDE48307.2020.00094"},{"key":"1389_CR27","unstructured":"Jayaram\u00a0Subramanya, S., Devvrit, F., Simhadri, H.V., Krishnawamy, R., Kadekodi, R.: Diskann: Fast accurate billion-point nearest neighbor search on a single node. In: NeurIPS (2019)"},{"key":"1389_CR28","unstructured":"Chen, Q., Zhao, B., Wang, H., Li, M., Liu, C., Li, Z., Yang, M., Wang, J.: SPANN: highly-efficient billion-scale approximate nearest neighborhood search. In: NeurIPS, pp. 5199\u20135212 (2021)"},{"key":"1389_CR29","doi-asserted-by":"crossref","unstructured":"Xu, Y., Liang, H., Li, J., Xu, S., Chen, Q., Zhang, Q., Li, C., Yang, Z., Yang, F., Yang, Y., Cheng, P., Yang, M.: Spfresh: Incremental in-place update for billion-scale vector search. In: SOSP, pp. 545\u2013561 (2023)","DOI":"10.1145\/3600006.3613166"},{"issue":"1","key":"1389_CR30","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/TPAMI.2010.57","volume":"33","author":"H J\u00e9gou","year":"2011","unstructured":"J\u00e9gou, H., Douze, M., Schmid, C.: Product quantization for nearest neighbor search. IEEE Trans. Pattern Anal. Mach. Intell. 33(1), 117\u2013128 (2011)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"1389_CR31","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2021","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with gpus. IEEE Trans. Big Data. 7(3), 535\u2013547 (2021)","journal-title":"IEEE Trans. Big Data."},{"key":"1389_CR32","doi-asserted-by":"crossref","unstructured":"Baranchuk, D., Babenko, A., Malkov, Y.: Revisiting the inverted indices for billion-scale approximate nearest neighbors. In: ECCV, vol. 11216, pp. 209\u2013224 (2018)","DOI":"10.1007\/978-3-030-01258-8_13"},{"issue":"2","key":"1389_CR33","doi-asserted-by":"publisher","first-page":"163","DOI":"10.14778\/3425879.3425887","volume":"14","author":"S Wang","year":"2020","unstructured":"Wang, S., Sun, Y., Bao, Z.: On the efficiency of k-means clustering: evaluation, optimization, and algorithm selection. Proc. VLDB Endow. 14(2), 163\u2013175 (2020)","journal-title":"Proc. VLDB Endow."},{"key":"1389_CR34","doi-asserted-by":"crossref","unstructured":"Liu, H., Huang, Z., Chen, Q., Li, M., Fu, Y., Zhang, L.: Fast clustering with flexible balance constraints. In: IEEE Big Data, pp. 743\u2013750 (2018)","DOI":"10.1109\/BigData.2018.8621917"},{"key":"1389_CR35","unstructured":"Grunau, C., Rozhon, V.: Adapting k-means algorithms for outliers. In: ICML, vol. 162, pp. 7845\u20137886 (2022)"},{"key":"1389_CR36","unstructured":"Huang, J., Feng, Q., Huang, Z., Xu, J., Wang, J.: Near-linear time approximation algorithms for k-means with outliers. In: ICML (2024)"},{"key":"1389_CR37","unstructured":"General Data Protection Regulation. https:\/\/en.wikipedia.org\/wiki\/General_Data_Protection_Regulation (2018)"},{"key":"1389_CR38","doi-asserted-by":"crossref","unstructured":"Davis, J.V., Kulis, B., Jain, P., Sra, S., Dhillon, I.S.: Information-theoretic metric learning. In: ICML, vol. 227, pp. 209\u2013216 (2007)","DOI":"10.1145\/1273496.1273523"},{"issue":"11","key":"1389_CR39","doi-asserted-by":"publisher","first-page":"2765","DOI":"10.1109\/TPAMI.2013.57","volume":"35","author":"E Elhamifar","year":"2013","unstructured":"Elhamifar, E., Vidal, R.: Sparse subspace clustering: algorithm, theory, and applications. IEEE Trans. Pattern Anal. Mach. Intell. 35(11), 2765\u20132781 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1389_CR40","unstructured":"Yi, J., Jin, R., Jain, A.K., Jain, S., Yang, T.: Semi-crowdsourced clustering: Generalizing crowd labeling by robust distance metric learning. In: NeurIPS, pp. 1781\u20131789 (2012)"},{"key":"1389_CR41","doi-asserted-by":"crossref","unstructured":"Ding, C.H.Q., Zhou, D., He, X., Zha, H.: R$$_{\\text{1}}$$-pca: rotational invariant L$$_{\\text{1 }}$$-norm principal component analysis for robust subspace factorization. In: ICML, vol. 148, pp. 281\u2013288 (2006)","DOI":"10.1145\/1143844.1143880"},{"key":"1389_CR42","unstructured":"Liu, G., Lin, Z., Yu, Y.: Robust subspace segmentation by low-rank representation. In: ICML, pp. 663\u2013670 (2010)"},{"key":"1389_CR43","first-page":"142","volume":"24","author":"T Li","year":"2023","unstructured":"Li, T., Beirami, A., Sanjabi, M., Smith, V.: On tilted losses in machine learning: theory and applications. J. Mach. Learn. Res. 24, 142\u2013114279 (2023)","journal-title":"J. Mach. Learn. Res."},{"key":"1389_CR44","doi-asserted-by":"crossref","unstructured":"Butler, R.W.: Saddlepoint Approximations with Applications, (2007)","DOI":"10.1017\/CBO9780511619083"},{"key":"1389_CR45","doi-asserted-by":"crossref","unstructured":"Siegmund, D.: Importance sampling in the monte carlo study of sequential tests. Ann. Stat., 673\u2013684 (1976)","DOI":"10.1214\/aos\/1176343541"},{"issue":"4","key":"1389_CR46","doi-asserted-by":"publisher","first-page":"1257","DOI":"10.1111\/rssb.12510","volume":"84","author":"R Tang","year":"2022","unstructured":"Tang, R., Yang, Y.: Bayesian inference for risk minimization via exponentially tilted empirical likelihood. J. R. Stat. Soc. B. 84(4), 1257\u20131286 (2022)","journal-title":"J. R. Stat. Soc. B."},{"key":"1389_CR47","doi-asserted-by":"crossref","unstructured":"Dembo, A., Zeitouni, O.: Large Deviations Techniques and Applications, (2009)","DOI":"10.1007\/978-3-642-03311-7"},{"issue":"11","key":"1389_CR48","doi-asserted-by":"publisher","first-page":"6749","DOI":"10.1109\/TIT.2014.2351393","volume":"60","author":"N Merhav","year":"2014","unstructured":"Merhav, N.: List decoding - random coding exponents and expurgated exponents. IEEE Trans. Inf. Theory 60(11), 6749\u20136759 (2014)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"5","key":"1389_CR49","doi-asserted-by":"publisher","first-page":"2850","DOI":"10.1109\/TIT.2018.2879477","volume":"65","author":"A Beirami","year":"2019","unstructured":"Beirami, A., Calderbank, A.R., Christiansen, M.M., Duffy, K.R., M\u00e9dard, M.: A characterization of guesswork on swiftly tilting curves. IEEE Trans. Inf. Theory 65(5), 2850\u20132871 (2019)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"2","key":"1389_CR50","doi-asserted-by":"publisher","first-page":"390","DOI":"10.1007\/s10957-010-9759-1","volume":"148","author":"EY Pee","year":"2011","unstructured":"Pee, E.Y., Royset, J.O.: On solving large-scale finite minimax problems using exponential smoothing. J. Optim. Theory Appl. 148(2), 390\u2013421 (2011)","journal-title":"J. Optim. Theory Appl."},{"issue":"12","key":"1389_CR51","doi-asserted-by":"publisher","first-page":"2216","DOI":"10.1109\/TPAMI.2010.47","volume":"32","author":"C Shen","year":"2010","unstructured":"Shen, C., Li, H.: On the dual formulation of boosting algorithms. IEEE Trans. Pattern Anal. Mach. Intell. 32(12), 2216\u20132231 (2010)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1389_CR52","unstructured":"Puranik, B., Beirami, A., Qin, Y., Madhow, U.: Improving robustness via tilted exponential layer: A communication-theoretic perspective. In: AISTATS, vol. 238, pp. 4510\u20134518 (2024)"},{"key":"1389_CR53","unstructured":"Wang, Y., Chen, H., Liu, W., He, F., Gong, T., Fu, Y., Tao, D.: Tilted sparse additive models. In: ICML, vol. 202, pp. 35579\u201335604 (2023)"},{"issue":"2","key":"1389_CR54","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1109\/TIT.1982.1056489","volume":"28","author":"SP Lloyd","year":"1982","unstructured":"Lloyd, S.P.: Least squares quantization in PCM. IEEE Trans. Inf. Theory 28(2), 129\u2013136 (1982)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1389_CR55","doi-asserted-by":"crossref","unstructured":"Nocedal, J., Wright, S.J.: Numerical Optimization, (1999)","DOI":"10.1007\/b98874"},{"key":"1389_CR56","unstructured":"Chen, K.: A constant factor approximation algorithm for k-median clustering with outliers. In: SODA, pp. 826\u2013835 (2008)"},{"key":"1389_CR57","doi-asserted-by":"crossref","unstructured":"Krishnaswamy, R., Li, S., Sandeep, S.: Constant approximation for k-median and k-means with outliers via iterative rounding. In: STOC, pp. 646\u2013659 (2018)","DOI":"10.1145\/3188745.3188882"},{"key":"1389_CR58","unstructured":"Charikar, M., Khuller, S., Mount, D.M., Narasimhan, G.: Algorithms for facility location problems with outliers. In: SODA, pp. 642\u2013651 (2001)"},{"issue":"1\u20133","key":"1389_CR59","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1023\/B:MACH.0000033115.78247.f0","volume":"56","author":"A Meyerson","year":"2004","unstructured":"Meyerson, A., O\u2019Callaghan, L., Plotkin, S.A.: A k-median algorithm with running time independent of data size. Mach. Learn. 56(1\u20133), 61\u201387 (2004)","journal-title":"Mach. Learn."},{"issue":"7","key":"1389_CR60","doi-asserted-by":"publisher","first-page":"757","DOI":"10.14778\/3067421.3067425","volume":"10","author":"S Gupta","year":"2017","unstructured":"Gupta, S., Kumar, R., Lu, K., Moseley, B., Vassilvitskii, S.: Local search methods for k-means with outliers. Proc. VLDB Endow. 10(7), 757\u2013768 (2017)","journal-title":"Proc. VLDB Endow."},{"issue":"3","key":"1389_CR61","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/3322808","volume":"6","author":"S Guha","year":"2019","unstructured":"Guha, S., Li, Y., Zhang, Q.: Distributed partial clustering. ACM Trans. Parallel Comput. 6(3), 11\u201311120 (2019)","journal-title":"ACM Trans. Parallel Comput."},{"key":"1389_CR62","unstructured":"Bhaskara, A., Vadgama, S., Xu, H.: Greedy sampling for approximate clustering in the presence of outliers. In: NeurIPS, pp. 11146\u201311155 (2019)"},{"key":"1389_CR63","unstructured":"Im, S., Qaem, M.M., Moseley, B., Sun, X., Zhou, R.: Fast noise removal for k-means clustering. In: AISTATS, vol. 108, pp. 456\u2013466 (2020)"},{"key":"1389_CR64","doi-asserted-by":"crossref","unstructured":"He, J., Liu, W., Chang, S.: Scalable similarity search with optimized kernel hashing. In: KDD, pp. 1129\u20131138 (2010)","DOI":"10.1145\/1835804.1835946"},{"key":"1389_CR65","doi-asserted-by":"crossref","unstructured":"Kulis, B., Grauman, K.: Kernelized locality-sensitive hashing for scalable image search. In: ICCV, pp. 2130\u20132137 (2009)","DOI":"10.1109\/ICCV.2009.5459466"},{"key":"1389_CR66","unstructured":"Raginsky, M., Lazebnik, S.: Locality-sensitive binary codes from shift-invariant kernels. In: NeurIPS, pp. 1509\u20131517 (2009)"},{"issue":"12","key":"1389_CR67","doi-asserted-by":"publisher","first-page":"2393","DOI":"10.1109\/TPAMI.2012.48","volume":"34","author":"J Wang","year":"2012","unstructured":"Wang, J., Kumar, S., Chang, S.: Semi-supervised hashing for large-scale search. IEEE Trans. Pattern Anal. Mach. Intell. 34(12), 2393\u20132406 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"6","key":"1389_CR68","doi-asserted-by":"publisher","first-page":"1247","DOI":"10.1109\/TPAMI.2014.2361319","volume":"37","author":"A Babenko","year":"2015","unstructured":"Babenko, A., Lempitsky, V.S.: The inverted multi-index. IEEE Trans. Pattern Anal. Mach. Intell. 37(6), 1247\u20131260 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1389_CR69","doi-asserted-by":"crossref","unstructured":"J\u00e9gou, H., Tavenard, R., Douze, M., Amsaleg, L.: Searching in one billion vectors: Re-rank with source coding. In: ICASSP, pp. 861\u2013864 (2011)","DOI":"10.1109\/ICASSP.2011.5946540"},{"key":"1389_CR70","doi-asserted-by":"crossref","unstructured":"Zhang, M., He, Y.: GRIP: multi-store capacity-optimized high-performance nearest neighbor search for vector search engine. In: CIKM, pp. 1673\u20131682 (2019)","DOI":"10.1145\/3357384.3357938"},{"key":"1389_CR71","doi-asserted-by":"crossref","unstructured":"Azizi, I., Echihabi, K., Palpanas, T.: Graph-based vector search: an experimental evaluation of the state-of-the-art. Proc. ACM Manag. Data. 3(1) (2025)","DOI":"10.1145\/3709693"},{"issue":"8","key":"1389_CR72","doi-asserted-by":"publisher","first-page":"1475","DOI":"10.1109\/TKDE.2019.2909204","volume":"32","author":"W Li","year":"2020","unstructured":"Li, W., Zhang, Y., Sun, Y., Wang, W., Li, M., Zhang, W., Lin, X.: Approximate nearest neighbor search on high dimensional data - experiments, analyses, and improvement. IEEE Trans. Knowl. Data Eng. 32(8), 1475\u20131488 (2020)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"5","key":"1389_CR73","doi-asserted-by":"publisher","first-page":"461","DOI":"10.14778\/3303753.3303754","volume":"12","author":"C Fu","year":"2019","unstructured":"Fu, C., Xiang, C., Wang, C., Cai, D.: Fast approximate nearest neighbor search with the navigating spreading-out graph. Proc. VLDB Endow. 12(5), 461\u2013474 (2019)","journal-title":"Proc. VLDB Endow."},{"issue":"8","key":"1389_CR74","first-page":"4139","volume":"44","author":"C Fu","year":"2022","unstructured":"Fu, C., Wang, C., Cai, D.: High dimensional similarity search with satellite system graph: efficiency, scalability, and unindexed query compatibility. IEEE Trans. Pattern Anal. Mach. Intell. 44(8), 4139\u20134150 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1389_CR75","doi-asserted-by":"crossref","unstructured":"Kung, P.P., Fan, Z., Zhao, T., Liu, Y., Lai, Z., Shi, J., Wu, Y., Yu, J., Shah, N., Venkataraman, G.: Improving embedding-based retrieval in friend recommendation with ANN query expansion. In: SIGIR, pp. 2930\u20132934 (2024)","DOI":"10.1145\/3626772.3661367"},{"key":"1389_CR76","doi-asserted-by":"crossref","unstructured":"Demmel, J.W.: Applied Numerical Linear Algebra, (1997)","DOI":"10.1137\/1.9781611971446"},{"key":"1389_CR77","unstructured":"Arthur, D., Vassilvitskii, S.: K-means++ the advantages of careful seeding. In: SODA, pp. 1027\u20131035 (2007)"},{"key":"1389_CR78","unstructured":"Cover, T.M.: Elements of Information Theory, (1999)"},{"key":"1389_CR79","doi-asserted-by":"crossref","unstructured":"Nesterov, Y.E.: Introductory Lectures on Convex Optimization - A Basic Course vol. 87, (2004)","DOI":"10.1007\/978-1-4419-8853-9"},{"issue":"2","key":"1389_CR80","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1137\/16M1080173","volume":"60","author":"L Bottou","year":"2018","unstructured":"Bottou, L., Curtis, F.E., Nocedal, J.: Optimization methods for large-scale machine learning. SIAM Rev. 60(2), 223\u2013311 (2018)","journal-title":"SIAM Rev."},{"issue":"4","key":"1389_CR81","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1007\/BF01456804","volume":"71","author":"H Weyl","year":"1912","unstructured":"Weyl, H.: Das asymptotische verteilungsgesetz der eigenwerte linearer partieller differentialgleichungen (mit einer anwendung auf die theorie der hohlraumstrahlung). Math. Ann. 71(4), 441\u2013479 (1912)","journal-title":"Math. Ann."},{"issue":"1\u20132","key":"1389_CR82","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/s10107-016-1017-3","volume":"161","author":"M Wang","year":"2017","unstructured":"Wang, M., Fang, E.X., Liu, H.: Stochastic compositional gradient descent: algorithms for minimizing compositions of expected-value functions. Math. Program. 161(1\u20132), 419\u2013449 (2017)","journal-title":"Math. Program."},{"key":"1389_CR83","unstructured":"Qi, Q., Xu, Y., Yin, W., Jin, R., Yang, T.: Attentional-biased stochastic gradient descent. Trans. Mach. Learn. Res. (2023)"},{"key":"1389_CR84","first-page":"105","volume":"18","author":"M Wang","year":"2017","unstructured":"Wang, M., Liu, J., Fang, E.X.: Accelerating stochastic composition optimization. J. Mach. Learn. Res. 18, 105\u2013110523 (2017)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"1389_CR85","doi-asserted-by":"publisher","first-page":"960","DOI":"10.1137\/18M1230542","volume":"30","author":"S Ghadimi","year":"2020","unstructured":"Ghadimi, S., Ruszczynski, A., Wang, M.: A single timescale stochastic approximation method for nested stochastic optimization. SIAM J. Optim. 30(1), 960\u2013979 (2020)","journal-title":"SIAM J. Optim."},{"key":"1389_CR86","unstructured":"Chen, Q., Wang, H., Li, M., Ren, G., Li, S., Zhu, J., Li, J., Liu, C., Zhang, L., Wang, J.: SPTAG: A library for fast approximate nearest neighbor search. (2018). https:\/\/github.com\/Microsoft\/SPTAG"},{"key":"1389_CR87","unstructured":"Datasets for approximate nearest neighbor search. http:\/\/corpus-texmex.irisa.fr\/ (2017)"},{"issue":"11","key":"1389_CR88","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"1389_CR89","unstructured":"Xiao, H., Rasul, K., Vollgraf, R.: Fashion-mnist: a novel image dataset for benchmarking machine learning algorithms. arXiv:1708.07747 (2017)"},{"key":"1389_CR90","unstructured":"Benchmarking nearest neighbors. (2019). http:\/\/ann-benchmarks.com\/"},{"key":"1389_CR91","doi-asserted-by":"crossref","unstructured":"Babenko, A., Lempitsky, V.S.: Efficient indexing of billion-scale datasets of deep descriptors. In: CVPR, pp. 2055\u20132063 (2016)","DOI":"10.1109\/CVPR.2016.226"},{"key":"1389_CR92","unstructured":"Deshpande, A., Kacham, P., Pratap, R.: Robust k-means++. In: Proceedings of the Thirty-Sixth Conference on Uncertainty in Artificial Intelligence, UAI 2020, Virtual Online, August 3-6, 2020, vol. 124, pp. 799\u2013808 (2020)"},{"issue":"12","key":"1389_CR93","doi-asserted-by":"publisher","first-page":"12181","DOI":"10.1109\/TKDE.2022.3159580","volume":"35","author":"Z Li","year":"2023","unstructured":"Li, Z., Zhao, Y., Hu, X., Botta, N., Ionescu, C., Chen, G.H.: ECOD: unsupervised outlier detection using empirical cumulative distribution functions. IEEE Trans. Knowl. Data Eng. 35(12), 12181\u201312193 (2023)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"1389_CR94","doi-asserted-by":"crossref","unstructured":"Liu, F.T., Ting, K.M., Zhou, Z.: Isolation forest. In: ICDM, pp. 413\u2013422 (2008)","DOI":"10.1109\/ICDM.2008.17"},{"key":"1389_CR95","unstructured":"Sugiyama, M., Borgwardt, K.M.: Rapid distance-based outlier detection via sampling. In: NeurIPS, pp. 467\u2013475 (2013)"},{"key":"1389_CR96","unstructured":"Guo, R., Sun, P., Lindgren, E., Geng, Q., Simcha, D., Chern, F., Kumar, S.: Accelerating large-scale inference with anisotropic vector quantization. In: ICML, vol. 119, pp. 3887\u20133896 (2020)"},{"key":"1389_CR97","doi-asserted-by":"crossref","unstructured":"Dong, W., Charikar, M., Li, K.: Efficient k-nearest neighbor graph construction for generic similarity measures. In: WWW, pp. 577\u2013586 (2011)","DOI":"10.1145\/1963405.1963487"},{"key":"1389_CR98","unstructured":"Neighborhood Graph and Tree for Indexing High-dimensional Data. (2018). https:\/\/github.com\/yahoojapan\/NGT"},{"key":"1389_CR99","unstructured":"Graph Library for Approximate Similarity Search. (2021). https:\/\/github.com\/zilliztech\/pyglass"},{"issue":"1","key":"1389_CR100","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1145\/3639269","volume":"2","author":"M Wang","year":"2024","unstructured":"Wang, M., Xu, W., Yi, X., Wu, S., Peng, Z., Ke, X., Gao, Y., Xu, X., Guo, R., Xie, C.: Starling: An i\/o-efficient disk-resident graph index framework for high-dimensional vector similarity search on data segment. Proc. ACM Manag. Data. 2(1), 2\u20130141201427 (2024)","journal-title":"Proc. ACM Manag. Data."},{"key":"1389_CR101","doi-asserted-by":"crossref","unstructured":"Lin, W., He, Z., Xiao, M.: Balanced clustering: A uniform model and fast algorithm. In: IJCAI, pp. 2987\u20132993 (2019)","DOI":"10.24963\/ijcai.2019\/414"}],"container-title":["World Wide Web"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-025-01389-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11280-025-01389-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-025-01389-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T13:03:39Z","timestamp":1770296619000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11280-025-01389-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,17]]},"references-count":101,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["1389"],"URL":"https:\/\/doi.org\/10.1007\/s11280-025-01389-1","relation":{},"ISSN":["1386-145X","1573-1413"],"issn-type":[{"value":"1386-145X","type":"print"},{"value":"1573-1413","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,17]]},"assertion":[{"value":"17 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests as defined by Springer or other interests that might be perceived to influence the results and\/or discussion reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics Approval and Consent to Participate"}},{"value":"Our manuscript is approved by all authors for publication.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for Publication"}},{"value":"The authors declare no competing interests.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"8"}}