{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:33:20Z","timestamp":1772724800462,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":78,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key R&D Program of China","award":["2022YFB4501401"],"award-info":[{"award-number":["2022YFB4501401"]}]},{"name":"National Natural Science Foundation of China (NSFC)","award":["62222210"],"award-info":[{"award-number":["62222210"]}]},{"name":"National Natural Science Foundation of China (NSFC)","award":["62072297"],"award-info":[{"award-number":["62072297"]}]},{"name":"National Natural Science Foundation of China (NSFC)","award":["U21B2017"],"award-info":[{"award-number":["U21B2017"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620665.3640360","type":"proceedings-article","created":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T14:18:06Z","timestamp":1713795486000},"page":"549-565","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["JUNO: Optimizing High-Dimensional Approximate Nearest Neighbour Search with Sparsity-Aware Algorithm and Ray-Tracing Core Mapping"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0874-0682","authenticated-orcid":false,"given":"Zihan","family":"Liu","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0169-0692","authenticated-orcid":false,"given":"Wentao","family":"Ni","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5660-5493","authenticated-orcid":false,"given":"Jingwen","family":"Leng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2192-5737","authenticated-orcid":false,"given":"Yu","family":"Feng","sequence":"additional","affiliation":[{"name":"University of Rochester, Rochester, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4479-5525","authenticated-orcid":false,"given":"Cong","family":"Guo","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5832-0347","authenticated-orcid":false,"given":"Quan","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6218-4659","authenticated-orcid":false,"given":"Chao","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-2302","authenticated-orcid":false,"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qi Zhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2802-0578","authenticated-orcid":false,"given":"Yuhao","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of Rochester, Rochester, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT47387.2019.00019"},{"key":"e_1_3_2_1_2_1","volume-title":"Knn-diffusion: Image generation via large-scale retrieval. CoRR, abs\/2204.02849","author":"Ashual Oron","year":"2022","unstructured":"Oron Ashual, Shelly Sheynin, Adam Polyak, Uriel Singer, Oran Gafni, Eliya Nachmani, and Yaniv Taigman. Knn-diffusion: Image generation via large-scale retrieval. CoRR, abs\/2204.02849, 2022."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.124"},{"key":"e_1_3_2_1_4_1","volume-title":"2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016","author":"Babenko Artem","year":"2016","unstructured":"Artem Babenko and Victor S. Lempitsky. Efficient indexing of billion-scale datasets of deep descriptors. In 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, June 27-30, 2016. IEEE Computer Society, 2016."},{"key":"e_1_3_2_1_5_1","volume-title":"Eighth ACM Conference on Recommender Systems, RecSys '14","author":"Bachrach Yoram","year":"2014","unstructured":"Yoram Bachrach, Yehuda Finkelstein, Ran Gilad-Bachrach, Liran Katzir, Noam Koenigstein, Nir Nice, and Ulrich Paquet. Speeding up the xbox recommender system using a euclidean transformation for inner-product spaces. In Alfred Kobsa, Michelle X. Zhou, Martin Ester, and Yehuda Koren, editors, Eighth ACM Conference on Recommender Systems, RecSys '14, Foster City, Silicon Valley, CA, USA - October 06 - 10, 2014. ACM, 2014."},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Baranchuk Dmitry","year":"2019","unstructured":"Dmitry Baranchuk, Dmitry Persiyanov, Anton Sinitsin, and Artem Babenko. Learning to route in similarity graphs. In International Conference on Machine Learning. PMLR, 2019."},{"key":"e_1_3_2_1_7_1","volume-title":"1997 Conference on Computer Vision and Pattern Recognition (CVPR '97)","author":"Jeffrey","year":"1997","unstructured":"Jeffrey S. Beis and David G. Lowe. Shape indexing using approximate nearest-neighbour search in high-dimensional spaces. In 1997 Conference on Computer Vision and Pattern Recognition (CVPR '97), June 17-19, 1997, San Juan, Puerto Rico. IEEE Computer Society, 1997."},{"key":"e_1_3_2_1_8_1","volume-title":"Unlimiformer: Long-range transformers with unlimited length input. CoRR, abs\/2305.01625","author":"Bertsch Amanda","year":"2023","unstructured":"Amanda Bertsch, Uri Alon, Graham Neubig, and Matthew R. Gormley. Unlimiformer: Long-range transformers with unlimited length input. CoRR, abs\/2305.01625, 2023."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10070940"},{"key":"e_1_3_2_1_10_1","first-page":"5199","article-title":"Highly-efficient billion-scale approximate nearest neighborhood search","volume":"34","author":"Chen Qi","year":"2021","unstructured":"Qi Chen, Bing Zhao, Haidong Wang, Mingqin Li, Chuanjie Liu, Zengzhong Li, Mao Yang, and Jingdong Wang. Spann: Highly-efficient billion-scale approximate nearest neighborhood search. Advances in Neural Information Processing Systems, 34:5199--5212, 2021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557098"},{"key":"e_1_3_2_1_12_1","volume-title":"A survey on deep neural network pruning-taxonomy, comparison, analysis, and recommendations. CoRR, abs\/2308.06767","author":"Cheng Hongrong","year":"2023","unstructured":"Hongrong Cheng, Miao Zhang, and Javen Qinfeng Shi. A survey on deep neural network pruning-taxonomy, comparison, analysis, and recommendations. CoRR, abs\/2308.06767, 2023."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020578"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/997817.997857"},{"key":"e_1_3_2_1_15_1","first-page":"2017","article-title":"Classification of region of interest in mammograms using dual contourlet transform and improved KNN","author":"Dong Min","year":"2017","unstructured":"Min Dong, Zhe Wang, Chenghui Dong, Xiaomin Mu, and Yide Ma. Classification of region of interest in mammograms using dual contourlet transform and improved KNN. J. Sensors, 2017, 2017.","journal-title":"J. Sensors"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1963405.1963487"},{"key":"e_1_3_2_1_17_1","volume-title":"Comparison of nearest-neighbor-search strategies and implementations for efficient shape registration. Journal of Software Engineering for Robotics (JOSER), 3(1)","author":"Elseberg J.","year":"2012","unstructured":"J. Elseberg, S. Magnenat, R. Siegwart, and A. N\u00fcchter. Comparison of nearest-neighbor-search strategies and implementations for efficient shape registration. Journal of Software Engineering for Robotics (JOSER), 3(1), 2012."},{"key":"e_1_3_2_1_18_1","volume-title":"7th International Conference on Learning Representations, ICLR 2019","author":"Frankle Jonathan","year":"2019","unstructured":"Jonathan Frankle and Michael Carbin. The lottery ticket hypothesis: Finding sparse, trainable neural networks. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net, 2019."},{"key":"e_1_3_2_1_19_1","series-title":"Proceedings of Machine Learning Research","volume-title":"International Conference on Machine Learning, ICML","author":"Frantar Elias","year":"2023","unstructured":"Elias Frantar and Dan Alistarh. Sparsegpt: Massive language models can be accurately pruned in one-shot. In Andreas Krause, Emma Brun-skill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett, editors, International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA, volume 202 of Proceedings of Machine Learning Research. PMLR, 2023."},{"key":"e_1_3_2_1_20_1","volume-title":"Fast approximate nearest neighbor search with the navigating spreading-out graph. arXiv preprint arXiv:1707.00143","author":"Fu Cong","year":"2017","unstructured":"Cong Fu, Chao Xiang, Changxu Wang, and Deng Cai. Fast approximate nearest neighbor search with the navigating spreading-out graph. arXiv preprint arXiv:1707.00143, 2017."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.14778\/3303753.3303754"},{"key":"e_1_3_2_1_22_1","volume-title":"High-dimensional approximate nearest neighbor search: with reliable and efficient distance comparison operations. CoRR, abs\/2303.09855","author":"Gao Jianyang","year":"2023","unstructured":"Jianyang Gao and Cheng Long. High-dimensional approximate nearest neighbor search: with reliable and efficient distance comparison operations. CoRR, abs\/2303.09855, 2023."},{"key":"e_1_3_2_1_23_1","volume-title":"Optimized product quantization","author":"Ge Tiezheng","year":"2013","unstructured":"Tiezheng Ge, Kaiming He, Qifa Ke, and Jian Sun. Optimized product quantization. IEEE transactions on pattern analysis and machine intelligence, 36(4), 2013."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2022.3161156"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2020","author":"Guo Cong","year":"2020","unstructured":"Cong Guo, Bo Yang Hsueh, Jingwen Leng, Yuxian Qiu, Yue Guan, Zehuan Wang, Xiaoying Jia, Xipeng Li, Minyi Guo, and Yuhao Zhu. Accelerating sparse DNN models without hardware-support via tile-wise sparsity. In Christine Cuicchi, Irene Qualters, and William T. Kramer, editors, Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2020, Virtual Event \/ Atlanta, Georgia, USA, November 9-19, 2020. IEEE\/ACM, 2020."},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA 2023","author":"Guo Cong","year":"2023","unstructured":"Cong Guo, Jiaming Tang, Weiming Hu, Jingwen Leng, Chen Zhang, Fan Yang, Yunxin Liu, Minyi Guo, and Yuhao Zhu. Olive: Accelerating large language models via hardware-friendly outlier-victim pair quantization. In Yan Solihin and Mark A. Heinrich, editors, Proceedings of the 50th Annual International Symposium on Computer Architecture, ISCA 2023, Orlando, FL, USA, June 17-21, 2023. ACM, 2023."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00095"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218732"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13-18 July 2020, Virtual Event, volume 119 of Proceedings of Machine Learning Research. PMLR","author":"Guo Ruiqi","year":"2020","unstructured":"Ruiqi Guo, Philip Sun, Erik Lindgren, Quan Geng, David Simcha, Felix Chern, and Sanjiv Kumar. Accelerating large-scale inference with anisotropic vector quantization. In Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13-18 July 2020, Virtual Event, volume 119 of Proceedings of Machine Learning Research. PMLR, 2020."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.5555\/3349354"},{"key":"e_1_3_2_1_31_1","volume-title":"Twenty-Second International Joint Conference on Artificial Intelligence","author":"Hajebi Kiana","year":"2011","unstructured":"Kiana Hajebi, Yasin Abbasi-Yadkori, Hossein Shahbazi, and Hong Zhang. Fast approximate nearest-neighbor search with k-nearest neighbor graph. In Twenty-Second International Joint Conference on Artificial Intelligence, 2011."},{"key":"e_1_3_2_1_32_1","first-page":"22","article-title":"Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks","author":"Hoefler Torsten","year":"2021","unstructured":"Torsten Hoefler, Dan Alistarh, Tal Ben-Nun, Nikoli Dryden, and Alexandra Peste. Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks. J. Mach. Learn. Res., 22, 2021.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_33_1","volume-title":"Optimization of indexing based on k-nearest neighbor graph for proximity search in high-dimensional data. arXiv preprint arXiv:1810.07355","author":"Iwasaki Masajiro","year":"2018","unstructured":"Masajiro Iwasaki and Daisuke Miyazaki. Optimization of indexing based on k-nearest neighbor graph for proximity search in high-dimensional data. arXiv preprint arXiv:1810.07355, 2018."},{"key":"e_1_3_2_1_34_1","first-page":"32","article-title":"Fast accurate billion-point nearest neighbor search on a single node","author":"Subramanya Suhas Jayaram","year":"2019","unstructured":"Suhas Jayaram Subramanya, Fnu Devvrit, Harsha Vardhan Simhadri, Ravishankar Krishnawamy, and Rohan Kadekodi. Diskann: Fast accurate billion-point nearest neighbor search on a single node. Advances in Neural Information Processing Systems, 32, 2019.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_35_1","volume-title":"Product quantization for nearest neighbor search","author":"Jegou Herve","year":"2010","unstructured":"Herve Jegou, Matthijs Douze, and Cordelia Schmid. Product quantization for nearest neighbor search. IEEE transactions on pattern analysis and machine intelligence, 33(1), 2010."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5946540"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_38_1","volume-title":"8th International Conference on Learning Representations, ICLR 2020","author":"Kitaev Nikita","year":"2020","unstructured":"Nikita Kitaev, Lukasz Kaiser, and Anselm Levskaya. Reformer: The efficient transformer. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenReview.net, 2020."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00518"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the Twenty-Ninth Annual ACM Symposium on the Theory of Computing","author":"Kleinberg Jon M.","year":"1997","unstructured":"Jon M. Kleinberg. Two algorithms for nearest-neighbor search in high dimensions. In Frank Thomson Leighton and Peter W. Shor, editors, Proceedings of the Twenty-Ninth Annual ACM Symposium on the Theory of Computing, El Paso, Texas, USA, May 4-6, 1997. ACM, 1997."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00021"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the 2020 International Conference on Management of Data, SIGMOD Conference 2020, online conference [Portland, OR, USA]","author":"Li Conglong","year":"2020","unstructured":"Conglong Li, Minjia Zhang, David G. Andersen, and Yuxiong He. Improving approximate nearest neighbor search through learned adaptive early termination. In David Maier, Rachel Pottinger, AnHai Doan, Wang-Chiew Tan, Abdussalam Alawini, and Hung Q. Ngo, editors, Proceedings of the 2020 International Conference on Management of Data, SIGMOD Conference 2020, online conference [Portland, OR, USA], June 14-19, 2020. ACM, 2020."},{"key":"e_1_3_2_1_43_1","first-page":"608","article-title":"A new fast inverted file-based algorithm for approximate nearest neighbor search without accuracy reduction","author":"Liu Yuchen","year":"2022","unstructured":"Yuchen Liu, Zhibin Pan, Liangzhuang Wang, and Yang Wang. A new fast inverted file-based algorithm for approximate nearest neighbor search without accuracy reduction. Inf. Sci., 608, 2022.","journal-title":"Inf. Sci."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507752"},{"key":"e_1_3_2_1_45_1","volume-title":"Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs","author":"Malkov Yu A","year":"2018","unstructured":"Yu A Malkov and Dmitry A Yashunin. Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE transactions on pattern analysis and machine intelligence, 42(4), 2018."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4842-7185-8"},{"key":"e_1_3_2_1_47_1","volume-title":"Indexing 1g vectors. https:\/\/github.com\/facebookresearch\/faiss\/wiki\/Indexing-1G-vectors","year":"2023","unstructured":"Meta. Indexing 1g vectors. https:\/\/github.com\/facebookresearch\/faiss\/wiki\/Indexing-1G-vectors, 2023."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2321376"},{"key":"e_1_3_2_1_49_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. Deep learning recommendation model for personalization and recommendation systems. CoRR abs\/1906.00091 2019."},{"key":"e_1_3_2_1_50_1","unstructured":"NVIDIA. Nvidia optix\u2122 ray tracing engine. https:\/\/developer.nvidia.com\/rtx\/ray-tracing\/optix."},{"key":"e_1_3_2_1_51_1","volume-title":"Nvidia turing gpu architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/design-visualization\/technologies\/turing-architecture\/NVIDIA-Turing-Architecture-Whitepaper.pdf","author":"NVIDIA.","year":"2018","unstructured":"NVIDIA. Nvidia turing gpu architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/design-visualization\/technologies\/turing-architecture\/NVIDIA-Turing-Architecture-Whitepaper.pdf, 2018."},{"key":"e_1_3_2_1_52_1","volume-title":"Nvidia ampere ga102 gpu architecture. https:\/\/www.nvidia.com\/content\/PDF\/nvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.pdf","author":"NVIDIA.","year":"2021","unstructured":"NVIDIA. Nvidia ampere ga102 gpu architecture. https:\/\/www.nvidia.com\/content\/PDF\/nvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.pdf, 2021."},{"key":"e_1_3_2_1_53_1","volume-title":"Multi-process service. https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. Multi-process service. https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf, 2022."},{"key":"e_1_3_2_1_54_1","volume-title":"Nvidia ada craft the engineering marvel of the rtx 4090. https:\/\/images.nvidia.com\/aem-dam\/Solutions\/geforce\/ada\/ada-lovelace-architecture\/nvidia-ada-gpu-craft.pdf","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. Nvidia ada craft the engineering marvel of the rtx 4090. https:\/\/images.nvidia.com\/aem-dam\/Solutions\/geforce\/ada\/ada-lovelace-architecture\/nvidia-ada-gpu-craft.pdf, 2022."},{"key":"e_1_3_2_1_55_1","volume-title":"Basic linear algebra on nvidia gpus. https:\/\/developer.nvidia.com\/cublas","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. Basic linear algebra on nvidia gpus. https:\/\/developer.nvidia.com\/cublas, 2023."},{"key":"e_1_3_2_1_56_1","volume-title":"Nvidia ada gpu architecture. https:\/\/images.nvidia.com\/aem-dam\/Solutions\/Data-Center\/l4\/nvidia-ada-gpu-architecture-whitepaper-v2.0.pdf","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. Nvidia ada gpu architecture. https:\/\/images.nvidia.com\/aem-dam\/Solutions\/Data-Center\/l4\/nvidia-ada-gpu-architecture-whitepaper-v2.0.pdf, 2023."},{"key":"e_1_3_2_1_57_1","volume-title":"Nvidia tensor cores unprecedented acceleration for hpc and ai. https:\/\/www.nvidia.com\/en-us\/data-center\/tensor-cores\/","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. Nvidia tensor cores unprecedented acceleration for hpc and ai. https:\/\/www.nvidia.com\/en-us\/data-center\/tensor-cores\/, 2023."},{"key":"e_1_3_2_1_58_1","volume-title":"GPT-4 technical report. CoRR, abs\/2303.08774","author":"AI.","year":"2023","unstructured":"OpenAI. GPT-4 technical report. CoRR, abs\/2303.08774, 2023."},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming, PPoPP 2023","author":"Peng Zhen","year":"2023","unstructured":"Zhen Peng, Minjia Zhang, Kai Li, Ruoming Jin, and Bin Ren. iqan: Fast and accurate vector search with efficient intra-query parallelism on multi-core architectures. In Maryam Mehri Dehnavi, Milind Kulkarni, and Sriram Krishnamoorthy, editors, Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming, PPoPP 2023, Montreal, QC, Canada, 25 February 2023 - 1 March 2023. ACM, 2023."},{"key":"e_1_3_2_1_60_1","volume-title":"Proceedings of the 15th International Conference on Data Engineering","author":"Pramanik Sakti","year":"1999","unstructured":"Sakti Pramanik and Jinhua Li. Fast approximate search algorithm for nearest neighbor queries in high dimensions. In Masaru Kitsuregawa, Michael P. Papazoglou, and Calton Pu, editors, Proceedings of the 15th International Conference on Data Engineering, Sydney, Australia, March 23-26, 1999. IEEE Computer Society, 1999."},{"key":"e_1_3_2_1_61_1","volume-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017","author":"Qi Charles Ruizhongtai","year":"2017","unstructured":"Charles Ruizhongtai Qi, Hao Su, Kaichun Mo, and Leonidas J. Guibas. Pointnet: Deep learning on point sets for 3d classification and segmentation. In 2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, Honolulu, HI, USA, July 21-26, 2017. IEEE Computer Society, 2017."},{"issue":"140","key":"e_1_3_2_1_62_1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of Machine Learning Research, 21(140):1--67, 2020.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_63_1","volume-title":"Benchmarks for billion-scale similarity search. https:\/\/research.yandex.com\/blog\/benchmarks-for-billion-scale-similarity-search","author":"Research Yandex","year":"2021","unstructured":"Yandex Research. Benchmarks for billion-scale similarity search. https:\/\/research.yandex.com\/blog\/benchmarks-for-billion-scale-similarity-search, 2021."},{"key":"e_1_3_2_1_64_1","volume-title":"Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014","author":"Shrivastava Anshumali","year":"2014","unstructured":"Anshumali Shrivastava and Ping Li. Asymmetric LSH (ALSH) for sublinear time maximum inner product search (MIPS). In Zoubin Ghahramani, Max Welling, Corinna Cortes, Neil D. Lawrence, and Kilian Q. Weinberger, editors, Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada, 2014."},{"key":"e_1_3_2_1_65_1","unstructured":"Harsha Vardhan Simhadri Ravishankar Krishnaswamy Gopal Srinivasa Suhas Jayaram Subramanya Andrija Antonijevic Dax Pryce David Kaczynski Shane Williams Siddarth Gollapudi Varun Sivashankar Neel Karia Aditi Singh Shikhar Jaiswal Neelam Mahapatro Philip Adams Bryan Tower and Yash Patel."},{"key":"e_1_3_2_1_66_1","volume-title":"Llama: Open and efficient foundation language models. CoRR, abs\/2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, Aur\u00e9lien Rodriguez, Armand Joulin, Edouard Grave, and Guillaume Lample. Llama: Open and efficient foundation language models. CoRR, abs\/2302.13971, 2023."},{"key":"e_1_3_2_1_67_1","volume-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. Attention is all you need. In Isabelle Guyon, Ulrike von Luxburg, Samy Bengio, Hanna M. Wallach, Rob Fergus, S. V. N. Vishwanathan, and Roman Garnett, editors, Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, USA, 2017."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/RT.2006.280216"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247790"},{"issue":"8","key":"e_1_3_2_1_70_1","doi-asserted-by":"crossref","DOI":"10.1109\/TPAMI.2005.165","article-title":"On the euclidean distance of images","volume":"27","author":"Wang Liwei","year":"2005","unstructured":"Liwei Wang, Yan Zhang, and Jufu Feng. On the euclidean distance of images. IEEE Trans. Pattern Anal. Mach. Intell., 27(8), 2005.","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476255"},{"key":"e_1_3_2_1_72_1","volume-title":"WWW '21: The Web Conference 2021","author":"Wang Ruoxi","year":"2021","unstructured":"Ruoxi Wang, Rakesh Shivanna, Derek Zhiyuan Cheng, Sagar Jain, Dong Lin, Lichan Hong, and Ed H. Chi. DCN V2: improved deep & cross network and practical lessons for web-scale learning to rank systems. In Jure Leskovec, Marko Grobelnik, Marc Najork, Jie Tang, and Leila Zia, editors, WWW '21: The Web Conference 2021, Virtual Event \/ Ljubljana, Slovenia, April 19-23, 2021. ACM \/ IW3C2, 2021."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00985"},{"key":"e_1_3_2_1_74_1","volume-title":"Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019","author":"Xu Tiancheng","year":"2019","unstructured":"Tiancheng Xu, Boyuan Tian, and Yuhao Zhu. Tigris: Architecture and algorithms for 3d perception in point clouds. In Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 2019, Columbus, OH, USA, October 12-16, 2019. ACM, 2019."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00517"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00064"},{"key":"e_1_3_2_1_77_1","volume-title":"Proceedings of the 20th ACM Multimedia Conference, MM '12","author":"Zhou Wengang","year":"2012","unstructured":"Wengang Zhou, Yijuan Lu, Houqiang Li, and Qi Tian. Scalar quantization for large scale image search. In Noboru Babaguchi, Kiyoharu Aizawa, John R. Smith, Shin'ichi Satoh, Thomas Plagemann, Xian-Sheng Hua, and Rong Yan, editors, Proceedings of the 20th ACM Multimedia Conference, MM '12, Nara, Japan, October 29 - November 02, 2012. ACM, 2012."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503221.3508409"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640360","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620665.3640360","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:41Z","timestamp":1750291421000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640360"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":78,"alternative-id":["10.1145\/3620665.3640360","10.1145\/3620665"],"URL":"https:\/\/doi.org\/10.1145\/3620665.3640360","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}