{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:15:39Z","timestamp":1766268939204,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,22]]},"DOI":"10.1145\/3627703.3629560","type":"proceedings-article","created":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T06:28:28Z","timestamp":1713421708000},"page":"786-802","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Minuet: Accelerating 3D Sparse Convolutions on GPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9581-9088","authenticated-orcid":false,"given":"Jiacheng","family":"Yang","sequence":"first","affiliation":[{"name":"University of Toronto &amp; Vector Institute"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0162-4547","authenticated-orcid":false,"given":"Christina","family":"Giannoula","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2802-121X","authenticated-orcid":false,"given":"Jun","family":"Wu","sequence":"additional","affiliation":[{"name":"Amazon"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6172-4510","authenticated-orcid":false,"given":"Mostafa","family":"Elhoushi","sequence":"additional","affiliation":[{"name":"Meta"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1810-2752","authenticated-orcid":false,"given":"James","family":"Gleeson","sequence":"additional","affiliation":[{"name":"Samsung AI Centre Toronto"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3839-0919","authenticated-orcid":false,"given":"Gennady","family":"Pekhimenko","sequence":"additional","affiliation":[{"name":"CentML &amp; University of Toronto &amp; Vector Institute"}]}],"member":"320","published-online":{"date-parts":[[2024,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1618452.1618500"},{"key":"e_1_3_2_1_2_1","first-page":"39","volume-title":"Applications of GPU Computing Series","author":"Alcantara Dan A.","year":"2012","unstructured":"Dan A. Alcantara, Vasily Volkov, Shubhabrata Sengupta, Michael Mitzenmacher, John D. Owens, and Nina Amenta. Chapter 4 - building an efficient hash table on the gpu. In Wen mei W. Hwu, editor, GPU Computing Gems Jade Edition, Applications of GPU Computing Series, pages 39--53. Morgan Kaufmann, Boston, 2012."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.170"},{"key":"e_1_3_2_1_4_1","volume-title":"A dataset for semantic segmentation of point cloud sequences. CoRR, abs\/1904.01416","author":"Behley Jens","year":"2019","unstructured":"Jens Behley, Martin Garbade, Andres Milioto, Jan Quenzel, Sven Behnke, Cyrill Stachniss, and Juergen Gall. A dataset for semantic segmentation of point cloud sequences. CoRR, abs\/1904.01416, 2019."},{"key":"e_1_3_2_1_5_1","unstructured":"Angel X. Chang Thomas Funkhouser Leonidas Guibas Pat Hanrahan Qixing Huang Zimo Li Silvio Savarese Manolis Savva Shuran Song Hao Su Jianxiong Xiao Li Yi and Fisher Yu. ShapeNet: An Information-Rich 3D Model Repository. Technical Report arXiv:1512.03012 [cs.GR] Stanford University --- Princeton University --- Toyota Technological Institute at Chicago 2015."},{"key":"e_1_3_2_1_6_1","volume-title":"TVM: end-to-end optimization stack for deep learning. CoRR, abs\/1802.04799","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Haichen Shen, Eddie Q. Yan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. TVM: end-to-end optimization stack for deep learning. CoRR, abs\/1802.04799, 2018."},{"key":"e_1_3_2_1_7_1","first-page":"13488","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Chen Yukang","year":"2023","unstructured":"Yukang Chen, Jianhui Liu, Xiangyu Zhang, Xiaojuan Qi, and Jiaya Jia. Largekernel3d: Scaling up kernels in 3d sparse cnns. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pages 13488--13498, June 2023."},{"key":"e_1_3_2_1_8_1","volume-title":"4d spatio-temporal convnets: Minkowski convolutional neural networks. CoRR, abs\/1904.08755","author":"Choy Christopher B.","year":"2019","unstructured":"Christopher B. Choy, JunYoung Gwak, and Silvio Savarese. 4d spatio-temporal convnets: Minkowski convolutional neural networks. CoRR, abs\/1904.08755, 2019."},{"key":"e_1_3_2_1_9_1","volume-title":"Spconv: Spatially sparse convolution library. https:\/\/github.com\/traveller59\/spconv","author":"Contributors Spconv","year":"2022","unstructured":"Spconv Contributors. Spconv: Spatially sparse convolution library. https:\/\/github.com\/traveller59\/spconv, 2022."},{"key":"e_1_3_2_1_10_1","unstructured":"NVIDIA Corporation. CUB: Main Page --- nvlabs.github.io. https:\/\/nvlabs.github.io\/cub\/index.html. [Accessed 09-May-2023]."},{"key":"e_1_3_2_1_11_1","unstructured":"NVIDIA Developer. NVIDIA Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute. [Accessed 01-Nov-2023]."},{"key":"e_1_3_2_1_12_1","volume-title":"Apr","author":"Developer NVIDIA","year":"2021","unstructured":"NVIDIA Developer. cuBLAS --- developer.nvidia.com. https:\/\/developer.nvidia.com\/cublas, Apr 2021. [Accessed 09-May-2023]."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575702"},{"key":"e_1_3_2_1_14_1","volume-title":"ASH: A modern framework for parallel spatial hashing in 3d perception. CoRR, abs\/2110.00511","author":"Dong Wei","year":"2021","unstructured":"Wei Dong, Yixing Lao, Michael Kaess, and Vladlen Koltun. ASH: A modern framework for parallel spatial hashing in 3d perception. CoRR, abs\/2110.00511, 2021."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3576933"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs5126382"},{"key":"e_1_3_2_1_17_1","volume-title":"Onur Mutlu. SparseP: Towards Efficient Sparse Matrix Vector Multiplication on Real Processing-In-Memory Architectures. In Proc. ACM Meas. Anal. Comput. Syst.","author":"Giannoula Christina","year":"2022","unstructured":"Christina Giannoula, Ivan Fernandez, Juan G\u00f3mez-Luna, Nectarios Koziris, Georgios Goumas, and Onur Mutlu. SparseP: Towards Efficient Sparse Matrix Vector Multiplication on Real Processing-In-Memory Architectures. In Proc. ACM Meas. Anal. Comput. Syst., 2022."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00961"},{"key":"e_1_3_2_1_19_1","volume-title":"Submanifold sparse convolutional networks. arXiv preprint arXiv:1706.01307","author":"Graham Benjamin","year":"2017","unstructured":"Benjamin Graham and Laurens van der Maaten. Submanifold sparse convolutional networks. arXiv preprint arXiv:1706.01307, 2017."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304621"},{"key":"e_1_3_2_1_21_1","volume-title":"Deep learning for 3d point clouds: A survey. CoRR, abs\/1912.12033","author":"Guo Yulan","year":"2019","unstructured":"Yulan Guo, Hanyun Wang, Qingyong Hu, Hao Liu, Li Liu, and Mohammed Bennamoun. Deep learning for 3d point clouds: A survey. CoRR, abs\/1912.12033, 2019."},{"key":"e_1_3_2_1_22_1","first-page":"91","volume-title":"Remote Sensing and Spatial Information Sciences","volume":"1","author":"Hackel Timo","year":"2017","unstructured":"Timo Hackel, N. Savinov, L. Ladicky, Jan D. Wegner, K. Schindler, and M. Pollefeys. SEMANTIC3D.NET: A new large-scale point cloud classification benchmark. In ISPRS Annals of the Photogrammetry, Remote Sensing and Spatial Information Sciences, volume IV-1-W1, pages 91--98, 2017."},{"key":"e_1_3_2_1_23_1","volume-title":"Occuseg: Occupancy-aware 3d instance segmentation. CoRR, abs\/2003.06537","author":"Han Lei","year":"2020","unstructured":"Lei Han, Tian Zheng, Lan Xu, and Lu Fang. Occuseg: Occupancy-aware 3d instance segmentation. CoRR, abs\/2003.06537, 2020."},{"key":"e_1_3_2_1_24_1","first-page":"5","article-title":"Exploiting hardware utilization and adaptive dataflow for efficient sparse convolution in 3d point clouds","author":"Hong Ke","year":"2023","unstructured":"Ke Hong, Zhongming Yu, Guohao Dai, Xinhao Yang, Yaoxiu Lian, Ningyi Xu, and Yu Wang. Exploiting hardware utilization and adaptive dataflow for efficient sparse convolution in 3d point clouds. Proceedings of Machine Learning and Systems, 5, 2023.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00017"},{"key":"e_1_3_2_1_26_1","volume-title":"Point-wise convolutional neural network. CoRR, abs\/1712.05245","author":"Hua Binh-Son","year":"2017","unstructured":"Binh-Son Hua, Minh-Khoi Tran, and Sai-Kit Yeung. Point-wise convolutional neural network. CoRR, abs\/1712.05245, 2017."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2018.01.009"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"key":"e_1_3_2_1_29_1","volume-title":"Automatic horizontal fusion for GPU kernels. CoRR, abs\/2007.01277","author":"Li Ao","year":"2020","unstructured":"Ao Li, Bojian Zheng, Gennady Pekhimenko, and Fan Long. Automatic horizontal fusion for GPU kernels. CoRR, abs\/2007.01277, 2020."},{"key":"e_1_3_2_1_30_1","volume-title":"Pointacc: Efficient point cloud accelerator. CoRR, abs\/2110.07600","author":"Lin Yujun","year":"2021","unstructured":"Yujun Lin, Zhekai Zhang, Haotian Tang, Hanrui Wang, and Song Han. Pointacc: Efficient point cloud accelerator. CoRR, abs\/2110.07600, 2021."},{"key":"e_1_3_2_1_31_1","volume-title":"Point-voxel CNN for efficient 3d deep learning. CoRR, abs\/1907.03739","author":"Liu Zhijian","year":"2019","unstructured":"Zhijian Liu, Haotian Tang, Yujun Lin, and Song Han. Point-voxel CNN for efficient 3d deep learning. CoRR, abs\/1907.03739, 2019."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00122"},{"key":"e_1_3_2_1_33_1","volume-title":"Voxel transformer for 3d object detection. CoRR, abs\/2109.02497","author":"Mao Jiageng","year":"2021","unstructured":"Jiageng Mao, Yujing Xue, Minzhe Niu, Haoyue Bai, Jiashi Feng, Xiaodan Liang, Hang Xu, and Chunjing Xu. Voxel transformer for 3d object detection. CoRR, abs\/2109.02497, 2021."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353481"},{"key":"e_1_3_2_1_35_1","volume-title":"Mix3d: Out-of-context data augmentation for 3d scenes. CoRR, abs\/2110.02210","author":"Nekrasov Alexey","year":"2021","unstructured":"Alexey Nekrasov, Jonas Schult, Or Litany, Bastian Leibe, and Francis Engelmann. Mix3d: Out-of-context data augmentation for 3d scenes. CoRR, abs\/2110.02210, 2021."},{"key":"e_1_3_2_1_36_1","unstructured":"NVIDIA. CUDA Runtime API: CUDA Toolkit Documentation --- docs.nvidia.com. https:\/\/docs.nvidia.com\/cuda\/cuda-runtime-api\/group___CUDART___STREAM.html. [Accessed 18-May-2023]."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2012.202"},{"key":"e_1_3_2_1_38_1","volume-title":"Li Erran Li, and Gao Huang. 3d object detection with pointformer. CoRR, abs\/2012.11409","author":"Pan Xuran","year":"2020","unstructured":"Xuran Pan, Zhuofan Xia, Shiji Song, Li Erran Li, and Gao Huang. 3d object detection with pointformer. CoRR, abs\/2012.11409, 2020."},{"key":"e_1_3_2_1_39_1","volume-title":"Pointnet: Deep learning on point sets for 3d classification and segmentation. CoRR, abs\/1612.00593","author":"Qi Charles Ruizhongtai","year":"2016","unstructured":"Charles Ruizhongtai Qi, Hao Su, Kaichun Mo, and Leonidas J. Guibas. Pointnet: Deep learning on point sets for 3d classification and segmentation. CoRR, abs\/1612.00593, 2016."},{"key":"e_1_3_2_1_40_1","volume-title":"Pointnet++: Deep hierarchical feature learning on point sets in a metric space. CoRR, abs\/1706.02413","author":"Qi Charles Ruizhongtai","year":"2017","unstructured":"Charles Ruizhongtai Qi, Li Yi, Hao Su, and Leonidas J. Guibas. Pointnet++: Deep hierarchical feature learning on point sets in a metric space. CoRR, abs\/1706.02413, 2017."},{"key":"e_1_3_2_1_41_1","volume-title":"Models for Common-sense Knowledge. CVPR 2015 Workshop on Functionality, Physics, Intentionality and Causality","author":"Savva Manolis","year":"2015","unstructured":"Manolis Savva, Angel X. Chang, and Pat Hanrahan. Semantically-Enriched 3D Models for Common-sense Knowledge. CVPR 2015 Workshop on Functionality, Physics, Intentionality and Causality, 2015."},{"key":"e_1_3_2_1_42_1","volume-title":"3d-fct: Simultaneous 3d object detection and tracking using feature correlation. CoRR, abs\/2110.02531","author":"Sharma Naman","year":"2021","unstructured":"Naman Sharma and Hocksoon Lim. 3d-fct: Simultaneous 3d object detection and tracking using feature correlation. CoRR, abs\/2110.02531, 2021."},{"key":"e_1_3_2_1_43_1","first-page":"302","volume-title":"Torchsparse: Efficient point cloud inference engine","author":"Tang Haotian","year":"2022","unstructured":"Haotian Tang, Zhijian Liu, Xiuyu Li, Yujun Lin, and Song Han. Torchsparse: Efficient point cloud inference engine. In D. Marculescu, Y. Chi, and C. Wu, editors, Proceedings of Machine Learning and Systems, volume 4, pages 302--315, 2022."},{"key":"e_1_3_2_1_44_1","volume-title":"Searching efficient 3d architectures with sparse point-voxel convolution. CoRR, abs\/2007.16100","author":"Tang Haotian","year":"2020","unstructured":"Haotian Tang, Zhijian Liu, Shengyu Zhao, Yujun Lin, Ji Lin, Hanrui Wang, and Song Han. Searching efficient 3d architectures with sparse point-voxel convolution. CoRR, abs\/2007.16100, 2020."},{"key":"e_1_3_2_1_45_1","first-page":"202","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops","author":"Tang Haotian","year":"2023","unstructured":"Haotian Tang, Shang Yang, Zhijian Liu, Ke Hong, Zhongming Yu, Xiuyu Li, Guohao Dai, Yu Wang, and Song Han. Torchsparse++: Efficient point cloud engine. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pages 202--209, June 2023."},{"key":"e_1_3_2_1_46_1","first-page":"344","volume-title":"Proceedings of the 2010 IEEE\/ACM Int'l Conference on Green Computing and Communications & Int'l Conference on Cyber, Physical and Social Computing, GREENCOM-CPSCOM '10","author":"Wang Guibin","year":"2010","unstructured":"Guibin Wang, YiSong Lin, and Wei Yi. Kernel fusion: An effective method for better power efficiency on multithreaded gpu. In Proceedings of the 2010 IEEE\/ACM Int'l Conference on Green Computing and Communications & Int'l Conference on Cyber, Physical and Social Computing, GREENCOM-CPSCOM '10, page 344--350, USA, 2010. IEEE Computer Society."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2019.8814115"},{"key":"e_1_3_2_1_48_1","first-page":"5","article-title":"Unified convolution framework: A compiler-based approach to support sparse convolutions","author":"Won Jaeyeon","year":"2023","unstructured":"Jaeyeon Won, Changwan Hong, Charith Mendis, Joel Emer, and Saman Amarasinghe. Unified convolution framework: A compiler-based approach to support sparse convolutions. Proceedings of Machine Learning and Systems, 5, 2023.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582047"},{"key":"e_1_3_2_1_50_1","first-page":"11784","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Yin Tianwei","year":"2021","unstructured":"Tianwei Yin, Xingyi Zhou, and Philipp Krahenbuhl. Center-based 3d object detection and tracking. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pages 11784--11793, June 2021."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989591"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS48674.2020.9213894"}],"event":{"name":"EuroSys '24: Nineteenth European Conference on Computer Systems","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Athens Greece","acronym":"EuroSys '24"},"container-title":["Proceedings of the Nineteenth European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629560","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627703.3629560","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:15:32Z","timestamp":1755825332000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627703.3629560"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":52,"alternative-id":["10.1145\/3627703.3629560","10.1145\/3627703"],"URL":"https:\/\/doi.org\/10.1145\/3627703.3629560","relation":{},"subject":[],"published":{"date-parts":[[2024,4,22]]},"assertion":[{"value":"2024-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}