{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T15:19:27Z","timestamp":1774365567757,"version":"3.50.1"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62104128"],"award-info":[{"award-number":["62104128"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21B2031"],"award-info":[{"award-number":["U21B2031"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62325405"],"award-info":[{"award-number":["62325405"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013105","name":"Shanghai Rising-Star Program","doi-asserted-by":"publisher","award":["24QB2706200"],"award-info":[{"award-number":["24QB2706200"]}],"id":[{"id":"10.13039\/501100013105","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Douyin Information Service Company Ltd"},{"name":"Tsinghua EE Xilinx AI Research Fund, Beijing National Research Center for Information Science and Technology","award":["BNR2024TD03001"],"award-info":[{"award-number":["BNR2024TD03001"]}]},{"name":"InnoHK Funding, Hong Kong, SAR, (HKSAR) and Research Grants Council of HKSAR","award":["16213824"],"award-info":[{"award-number":["16213824"]}]},{"name":"ACCESS-AI Chip Center for Emerging Smart Systems and dgSPARSE Project"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tcad.2024.3518413","type":"journal-article","created":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T14:25:07Z","timestamp":1734359107000},"page":"2226-2239","source":"Crossref","is-referenced-by-count":2,"title":["Enabling Efficient Sparse Multiplications on GPUs With Heuristic Adaptability"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7000-6537","authenticated-orcid":false,"given":"Jiaming","family":"Xu","sequence":"first","affiliation":[{"name":"Qing Yuan Research Institute, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2012-8540","authenticated-orcid":false,"given":"Shan","family":"Huang","sequence":"additional","affiliation":[{"name":"Qing Yuan Research Institute, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4286-6359","authenticated-orcid":false,"given":"Jinhao","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, Beijing, China"}]},{"given":"Guyue","family":"Huang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2093-1788","authenticated-orcid":false,"given":"Yuan","family":"Xie","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6108-5157","authenticated-orcid":false,"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8464-0130","authenticated-orcid":false,"given":"Guohao","family":"Dai","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00076"},{"key":"ref2","first-page":"428","article-title":"Exploiting hardware utilization and adaptive dataflow for efficient sparse convolution in 3D point clouds","volume-title":"Proc. Mach. Learn. Syst.","author":"Hong"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00080"},{"key":"ref4","first-page":"1","article-title":"SPARSE: Sparse architecture search for CNNs on resource-constrained microcontrollers","volume-title":"Proc. 33rd Adv. Neural Inf. Process. Syst.","author":"Fedorov"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3694715.3695964"},{"key":"ref6","article-title":"SparseGPT: Massive language models can be accurately pruned in one-shot","author":"Frantar","year":"2023","journal-title":"arXiv:2301.00774"},{"key":"ref7","first-page":"669","article-title":"Tux2: Distributed graph computation for machine learning","volume-title":"Proc. 14th USENIX Symp. Netw. Syst. Design Implement. (NSDI)","author":"Xiao"},{"key":"ref8","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and Huffman coding","author":"Han","year":"2016","journal-title":"arXiv:1510.00149"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582047"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2017.8115709"},{"key":"ref11","volume-title":"Basic Linear Algebra for Sparse Matrices on Nvidia GPUs","year":"2024"},{"key":"ref12","volume-title":"Rocsparse documentation","year":"2024"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21105\/joss.01244"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3293883.3295712"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651378"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3208040.3208062"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3332466.3374546"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414654"},{"key":"ref20","article-title":"TC-GNN: Bridging sparse GNN computation and dense tensor cores on GPUs","author":"Wang","year":"2023","journal-title":"arXiv:2112.02052"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972740.43"},{"key":"ref22","volume-title":"Source code of GE-SpMM","year":"2024"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654078"},{"key":"ref24","first-page":"578","article-title":"TVM: An automated end-to-end optimizing compiler for deep learning","volume-title":"Proc. 13th USENIX Symp. Oper. Syst. Design Implement. (OSDI)","author":"Chen"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.68"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-96983-1_48"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00164"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00022"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3685277"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.57"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3428226"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45798-4_6"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1201\/b12985-26"},{"key":"ref34","volume-title":"Lightgbm","year":"2024"},{"key":"ref35","volume-title":"Cuda multi process service overview","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3466795"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/2851141.2851145"},{"key":"ref38","first-page":"1","article-title":"Fast graph representation learning with PyTorch geometric","volume-title":"Proc. ICLR Workshop Represent. Learn. Graphs Manifolds","author":"Fey"},{"key":"ref39","article-title":"Semi-supervised classification with graph convolutional networks","author":"Kipf","year":"2017","journal-title":"arXiv:1609.02907"}],"container-title":["IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/43\/11007940\/10802949.pdf?arnumber=10802949","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T18:43:16Z","timestamp":1763750596000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10802949\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":39,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tcad.2024.3518413","relation":{},"ISSN":["0278-0070","1937-4151"],"issn-type":[{"value":"0278-0070","type":"print"},{"value":"1937-4151","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}