{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T14:44:57Z","timestamp":1773153897871,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3581784.3607062","type":"proceedings-article","created":{"date-parts":[[2023,10,30]],"date-time":"2023-10-30T20:34:48Z","timestamp":1698698088000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["Parallel Top-K Algorithms on GPU: A Comprehensive Study and New Methods"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7829-7348","authenticated-orcid":false,"given":"Jingrong","family":"Zhang","sequence":"first","affiliation":[{"name":"NVIDIA, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3140-0854","authenticated-orcid":false,"given":"Akira","family":"Naruse","sequence":"additional","affiliation":[{"name":"NVIDIA, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9606-1599","authenticated-orcid":false,"given":"Xipeng","family":"Li","sequence":"additional","affiliation":[{"name":"NVIDIA, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0906-8778","authenticated-orcid":false,"given":"Yong","family":"Wang","sequence":"additional","affiliation":[{"name":"NVIDIA, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2023,11,11]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"4","article-title":"Fast k-selection algorithms for graphics processing units","volume":"17","author":"Alabi Tolu","year":"2012","unstructured":"Tolu Alabi, Jeffrey D Blanchard, Bradley Gordon, and Russel Steinbach. 2012. Fast k-selection algorithms for graphics processing units. Journal of Experimental Algorithmics (JEA) 17 (2012), 4--1.","journal-title":"Journal of Experimental Algorithmics (JEA)"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2019.02.006"},{"key":"e_1_3_2_2_3_1","volume-title":"Efficient Indexing of Billion-Scale Datasets of Deep Descriptors. In 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016","author":"Babenko Artem","year":"2016","unstructured":"Artem Babenko and Victor S. Lempitsky. 2016. Efficient Indexing of Billion-Scale Datasets of Deep Descriptors. In 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, June 27--30, 2016. IEEE Computer Society, Las Vegas, 2055--2063."},{"key":"e_1_3_2_2_4_1","volume-title":"Heap based k-nearest neighbor search on GPUs. Congreso Espanol de Inform\u00e1tica (CEDI) 1, 7 (01","author":"Barrientos Ricardo","year":"2010","unstructured":"Ricardo Barrientos, J.I. Gomez, Christian Tenllado, and Manuel Prieto Matias. 2010. Heap based k-nearest neighbor search on GPUs. Congreso Espanol de Inform\u00e1tica (CEDI) 1, 7 (01 2010), 559--566."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-017-2110-y"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Nathan Bell and Jared Hoberock. 2012. Chapter 26 - Thrust: A Productivity-Oriented Library for CUDA. In GPU Computing Gems Jade Edition Wen mei W. Hwu (Ed.). Morgan Kaufmann Boston 359--371.","DOI":"10.1016\/B978-0-12-385963-1.00026-5"},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining (Melbourne VIC, Australia) (WSDM '19)","author":"Chen Minmin","unstructured":"Minmin Chen, Alex Beutel, Paul Covington, Sagar Jain, Francois Belletti, and Ed H. Chi. 2019. Top-K Off-Policy Correction for a REINFORCE Recommender System. In Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining (Melbourne VIC, Australia) (WSDM '19). Association for Computing Machinery, New York, NY, USA, 456--464."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS49909.2020.9220622"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0074113"},{"key":"e_1_3_2_2_10_1","unstructured":"Meta Platforms Inc. 2022. Faiss v1.7.3. Meta Platforms Inc. Retrieved March 1 2023 from https:\/\/github.com\/facebookresearch\/faiss"},{"key":"e_1_3_2_2_11_1","unstructured":"Anil Gaihre. 2022. Anil-Gaihre\/DrTopKSC. https:\/\/github.com\/Anil-Gaihre\/DrTopKSC"},{"key":"e_1_3_2_2_12_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (St","author":"Gaihre Anil","year":"2021","unstructured":"Anil Gaihre, Da Zheng, Scott Weitze, Lingda Li, Shuaiwen Leon Song, Caiwen Ding, Xiaoye S. Li, and Hang Liu. 2021. Dr. Top-k: Delegate-Centric Top-k on GPUs. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (St. Louis, Missouri) (SC '21). Association for Computing Machinery, New York, NY, USA, Article 39, 14 pages."},{"key":"e_1_3_2_2_13_1","volume-title":"Accelerating high-throughput virtual screening through molecular pool-based active learning. Chemical science 12, 22","author":"Graff David E","year":"2021","unstructured":"David E Graff, Eugene I Shakhnovich, and Connor W Coley. 2021. Accelerating high-throughput virtual screening through molecular pool-based active learning. Chemical science 12, 22 (2021), 7866--7881."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPA.2009.89"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.57"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0092409"},{"key":"e_1_3_2_2_18_1","volume-title":"6th International Conference on Learning Representations, ICLR","author":"Lin Yujun","year":"2018","unstructured":"Yujun Lin, Song Han, Huizi Mao, Yu Wang, and Bill Dally. 2018. Deep Gradient Compression: Reducing the Communication Bandwidth for Distributed Training. In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. OpenReview.net, Vancouver, BC, Canada, 14 pages. https:\/\/openreview.net\/forum?id=SkhQHMW0W"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2020.06.052"},{"key":"e_1_3_2_2_20_1","unstructured":"Duane Merrill. 2015. Cub. https:\/\/on-demand.gputechconf.com\/gtc\/2015\/presentation\/S5617-Duane-Merrill.pdf"},{"key":"e_1_3_2_2_21_1","unstructured":"Nvidia. 2020. NVIDIA A100 Tensor Core Gpu. https:\/\/www.nvidia.com\/en-us\/data-center\/a100\/"},{"key":"e_1_3_2_2_22_1","unstructured":"Nvidia. 2021. NVIDIA A10 Tensor Core Gpu. https:\/\/www.nvidia.com\/en-us\/data-center\/products\/a10-gpu\/"},{"key":"e_1_3_2_2_23_1","unstructured":"Nvidia. 2022. CUDA 12.0 Release Notes. https:\/\/docs.nvidia.com\/cuda\/archive\/12.0.0\/cuda-toolkit-release-notes\/index.html"},{"key":"e_1_3_2_2_24_1","unstructured":"NVIDIA. 2023. CUDA C++ Best Practices Guide. https:\/\/docs.nvidia.com\/cuda\/cuda-c-best-practices-guide\/"},{"key":"e_1_3_2_2_25_1","unstructured":"Nvidia. 2023. NVIDIA H100 Tensor Core Gpu. https:\/\/www.nvidia.com\/en-us\/data-center\/h100\/"},{"key":"e_1_3_2_2_26_1","unstructured":"NVIDIA. 2023. NVIDIA Kernel Profiling Guide. https:\/\/docs.nvidia.com\/nsight-compute\/ProfilingGuide\/index.html"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3323165.3323198"},{"key":"e_1_3_2_2_28_1","unstructured":"Rapidsai. 2022. Rapidsai\/raft: RAFT contains fundamental widely-used algorithms and primitives for data science Graph and machine learning. https:\/\/github.com\/rapidsai\/raft"},{"key":"e_1_3_2_2_29_1","unstructured":"NVIDIA Research. 2022. cub::DeviceRadixSort. https:\/\/nvlabs.github.io\/cub\/structcub_1_1_device_radix_sort.html"},{"key":"e_1_3_2_2_30_1","unstructured":"Tobias Ribizel. 2020. gpu selection. https:\/\/github.com\/upsj\/gpu_selection"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2019.102588"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3183735"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.115"},{"key":"e_1_3_2_2_34_1","volume-title":"24th Pan-Hellenic Conference on Informatics","author":"Velentzas Polychronis","year":"2021","unstructured":"Polychronis Velentzas, Panagiotis Moutafis, and George Mavrommatis. 2021. An Improved GPU-Based Algorithmfor Processing the k Nearest Neighbor Query. In 24th Pan-Hellenic Conference on Informatics (Athens, Greece) (PCI 2020). Association for Computing Machinery, New York, NY, USA, 372--375."},{"key":"e_1_3_2_2_35_1","unstructured":"Wikipedia. 2022. Selection algorithm. https:\/\/en.wikipedia.org\/wiki\/Selection_algorithm"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3314578"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380126"},{"key":"e_1_3_2_2_38_1","unstructured":"Vasileios Zois Vassilis J Tsotras and Walid A Najjar. 2019. GPU accelerated top-k selection with efficient early stopping."}],"event":{"name":"SC '23: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver CO USA","acronym":"SC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607062","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581784.3607062","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:23Z","timestamp":1750178183000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607062"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,11]]},"references-count":38,"alternative-id":["10.1145\/3581784.3607062","10.1145\/3581784"],"URL":"https:\/\/doi.org\/10.1145\/3581784.3607062","relation":{},"subject":[],"published":{"date-parts":[[2023,11,11]]},"assertion":[{"value":"2023-11-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}