{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T14:03:06Z","timestamp":1768312986688,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T00:00:00Z","timestamp":1691366400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF-2126672, CCF-2144523 (CAREER), OAC-2209957, and TI-2229304"],"award-info":[{"award-number":["CCF-2126672, CCF-2144523 (CAREER), OAC-2209957, and TI-2229304"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,7]]},"DOI":"10.1145\/3605573.3605625","type":"proceedings-article","created":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T16:21:16Z","timestamp":1694622076000},"page":"51-61","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["SNICIT: Accelerating Sparse Neural Network Inference via Compression at Inference Time on GPU"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0718-1003","authenticated-orcid":false,"given":"Shui","family":"Jiang","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9768-3378","authenticated-orcid":false,"given":"Tsung-Wei","family":"Huang","sequence":"additional","affiliation":[{"name":"The University of Wisconsin at Madison, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6406-4810","authenticated-orcid":false,"given":"Bei","family":"Yu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7348-5625","authenticated-orcid":false,"given":"Tsung-Yi","family":"Ho","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong"}]}],"member":"320","published-online":{"date-parts":[[2023,9,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Deep learning using rectified linear units (relu). arXiv preprint arXiv:1803.08375","author":"Agarap Abien\u00a0Fred","year":"2018","unstructured":"Abien\u00a0Fred Agarap. 2018. Deep learning using rectified linear units (relu). arXiv preprint arXiv:1803.08375 (2018)."},{"key":"e_1_3_2_2_2_1","unstructured":"Vinod\u00a0Nair Alex\u00a0Krizhevsky and Geoffrey Hinton. [n. d.]. The CIFAR-10 dataset. https:\/\/www.cs.toronto.edu\/\u00a0kriz\/cifar.html"},{"key":"e_1_3_2_2_3_1","volume-title":"Accelerating K-Means clustering with parallel implementations and GPU computing","author":"Bhimani Janki","unstructured":"Janki Bhimani, Miriam Leeser, and Ningfang Mi. 2015. Accelerating K-Means clustering with parallel implementations and GPU computing. In IEEE HPEC. 1\u20136."},{"key":"e_1_3_2_2_4_1","volume-title":"A GPU Implementation of the Sparse Deep Neural Network Graph Challenge","author":"Bisson Mauro","unstructured":"Mauro Bisson and Massimiliano Fatica. 2019. A GPU Implementation of the Sparse Deep Neural Network Graph Challenge. In IEEE HPEC. 1\u20138."},{"key":"e_1_3_2_2_5_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, 2020. Language models are few-shot learners. NeurIPS 33 (2020), 1877\u20131901.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_6_1","volume-title":"Euro-Par","author":"Choi Kyusik","unstructured":"Kyusik Choi and Hoeseok Yang. 2021. A GPU Architecture Aware Fine-Grain Pruning Technique for Deep Neural Networks. In Euro-Par. Springer, 217\u2013231."},{"key":"e_1_3_2_2_7_1","volume-title":"Article 25 (oct","author":"Dalton Steven","year":"2015","unstructured":"Steven Dalton, Luke Olson, and Nathan Bell. 2015. Optimizing Sparse Matrix\u2014Matrix Multiplication for the GPU. ACM TOMS 41, 4, Article 25 (oct 2015)."},{"key":"e_1_3_2_2_8_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_9_1","unstructured":"Utku Evci Trevor Gale Jacob Menick 2020. Rigging the lottery: Making all tickets winners. In ICML. PMLR 2943\u20132952."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1975.1055330"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Trevor Gale Matei Zaharia Cliff Young 2020. Sparse GPU Kernels for Deep Learning. In SC20. 1\u201314.","DOI":"10.1109\/SC41405.2020.00021"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","unstructured":"Georgios Georgiadis. 2019. Accelerating convolutional neural networks via activation map compression. In CVPR. 7085\u20137095.","DOI":"10.1109\/CVPR.2019.00725"},{"key":"e_1_3_2_2_13_1","volume-title":"SC20","author":"Guo Cong","unstructured":"Cong Guo, Bo\u00a0Yang Hsueh, Jingwen Leng, 2020. Accelerating sparse dnn models without hardware-support via tile-wise sparsity. In SC20. IEEE, 1\u201315."},{"key":"e_1_3_2_2_14_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William\u00a0J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_2_15_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren 2016. Deep residual learning for image recognition. In CVPR. 770\u2013778."},{"key":"e_1_3_2_2_16_1","volume-title":"Accelerating Sparse Deep Neural Networks on FPGAs","author":"Huang Sitao","unstructured":"Sitao Huang, Carl Pearson, Rakesh Nagi, 2019. Accelerating Sparse Deep Neural Networks on FPGAs. In IEEE HPEC. 1\u20137."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00105"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3104255"},{"key":"e_1_3_2_2_19_1","unstructured":"hyeon95y. 2020. SparseLinear. https:\/\/github.com\/hyeon95y\/SparseLinear"},{"key":"e_1_3_2_2_20_1","volume-title":"Sparse Deep Neural Network Acceleration on HBM-Enabled FPGA Platform","author":"Jain Abhishek\u00a0Kumar","unstructured":"Abhishek\u00a0Kumar Jain, Sharan Kumar, Aashish Tripathi, 2021. Sparse Deep Neural Network Acceleration on HBM-Enabled FPGA Platform. In IEEE HPEC. 1\u20137."},{"key":"e_1_3_2_2_21_1","first-page":"20744","article-title":"Top-kast: Top-k always sparse training","volume":"33","author":"Jayakumar Siddhant","year":"2020","unstructured":"Siddhant Jayakumar, Razvan Pascanu, Jack Rae, 2020. Top-kast: Top-k always sparse training. NeurIPS 33 (2020), 20744\u201320754.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_22_1","volume-title":"Sparse Deep Neural Network Graph Challenge","author":"Kepner Jeremy","unstructured":"Jeremy Kepner, Simon Alford, Vijay Gadepally, 2019. Sparse Deep Neural Network Graph Challenge. In IEEE HPEC. 1\u20137."},{"key":"e_1_3_2_2_23_1","volume-title":"Radix-net: Structured sparse matrices for deep neural networks","author":"Kepner Jeremy","year":"2019","unstructured":"Jeremy Kepner and Ryan Robinett. 2019. Radix-net: Structured sparse matrices for deep neural networks. In IEEE IPDPSW. 268\u2013274."},{"key":"e_1_3_2_2_24_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P.","year":"2014","unstructured":"D.\u00a0P. Kingma and J. Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_25_1","unstructured":"Adarsh Kumar Arjun Balasubramanian Shivaram Venkataraman 2019. Accelerating deep learning inference via freezing. In USENIX HotCloud."},{"key":"e_1_3_2_2_26_1","unstructured":"Mark Kurtz Justin Kopinsky Rati Gelashvili 2020. Inducing and exploiting activation sparsity for fast inference on deep neural networks. In ICML. PMLR 5533\u20135543."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00501-8"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"crossref","unstructured":"Yun Li Chen Zhang Shihao Han 2021. Boosting Mobile CNN Inference through Semantic Memory. In ACM MM. 2362\u20132371.","DOI":"10.1145\/3474085.3475399"},{"key":"e_1_3_2_2_29_1","first-page":"11","article-title":"Accelerating Large Sparse Neural Network Inference Using GPU Task Graph Parallelism","volume":"33","author":"Lin D.","year":"2022","unstructured":"D. Lin and T. Huang. 2022. Accelerating Large Sparse Neural Network Inference Using GPU Task Graph Parallelism. IEEE TPDS 33, 11 (nov 2022), 3041\u20133052.","journal-title":"IEEE TPDS"},{"key":"e_1_3_2_2_30_1","volume-title":"A Novel Inference Algorithm for Large Sparse Neural Network using Task Graph Parallelism","author":"Lin Dian-Lun","unstructured":"Dian-Lun Lin and Tsung-Wei Huang. 2020. A Novel Inference Algorithm for Large Sparse Neural Network using Task Graph Parallelism. In IEEE HPEC. 1\u20137."},{"key":"e_1_3_2_2_31_1","volume-title":"5th Berkeley Symp. Math. Statist. Probability. 281\u2013297","author":"MacQueen J","year":"1967","unstructured":"J MacQueen. 1967. Some methods for classification and analysis of multivariate observations. In 5th Berkeley Symp. Math. Statist. Probability. 281\u2013297."},{"key":"e_1_3_2_2_32_1","volume-title":"Accelerating sparse deep neural networks. arXiv preprint arXiv:2104.08378","author":"Mishra Asit","year":"2021","unstructured":"Asit Mishra, Jorge\u00a0Albericio Latorre, Jeff Pool, 2021. Accelerating sparse deep neural networks. arXiv preprint arXiv:2104.08378 (2021)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477008"},{"key":"e_1_3_2_2_34_1","volume-title":"Accelerating Sparse Deep Neural Network Inference Using GPU Tensor Cores","author":"Sun Yufei","unstructured":"Yufei Sun, Long Zheng, Qinggang Wang, 2022. Accelerating Sparse Deep Neural Network Inference Using GPU Tensor Cores. In IEEE HPEC. 1\u20137."},{"key":"e_1_3_2_2_35_1","volume-title":"Visualizing data using t-SNE.JMLR 9, 11","author":"Maaten Laurens Van\u00a0der","year":"2008","unstructured":"Laurens Van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE.JMLR 9, 11 (2008)."},{"key":"e_1_3_2_2_36_1","volume-title":"Large Scale K-Median Clustering for Stable Clustering Instances","author":"Voevodski Konstantin","unstructured":"Konstantin Voevodski. 2021. Large Scale K-Median Clustering for Stable Clustering Instances. In AISTATS, Arindam Banerjee and Kenji Fukumizu (Eds.). Vol.\u00a0130. PMLR, 2890\u20132898."},{"key":"e_1_3_2_2_37_1","unstructured":"Mitchell Wortsman Gabriel Ilharco Samir\u00a0Ya Gadre 2022. Model soups: averaging weights of multiple fine-tuned models improves accuracy without increasing inference time. In ICML. PMLR 23965\u201323998."},{"key":"e_1_3_2_2_38_1","volume-title":"Fast Sparse Deep Neural Network Inference with Flexible SpMM Optimization Space Exploration","author":"Xin Jie","unstructured":"Jie Xin, Xianqi Ye, Long Zheng, 2021. Fast Sparse Deep Neural Network Inference with Flexible SpMM Optimization Space Exploration. In IEEE HPEC. 1\u20137."},{"key":"e_1_3_2_2_39_1","volume-title":"Towards Fast GPU-based Sparse DNN Inference: A Hybrid Compute Model","author":"Xu Shaoxian","unstructured":"Shaoxian Xu, Minkang Wu, Long Zheng, 2022. Towards Fast GPU-based Sparse DNN Inference: A Hybrid Compute Model. In IEEE HPEC. 1\u20137."},{"key":"e_1_3_2_2_40_1","volume-title":"Dasnet: Dynamic activation sparsity for neural network efficiency improvement","author":"Yang Qing","year":"2019","unstructured":"Qing Yang, Jiachen Mao, Zuoguan Wang, 2019. Dasnet: Dynamic activation sparsity for neural network efficiency improvement. In IEEE ICTAI. 1401\u20131405."},{"key":"e_1_3_2_2_41_1","unstructured":"Corinna\u00a0Cortes Yann\u00a0LeCun and Christopher\u00a0J.C. Burges. [n. d.]. The MNIST database of handwritten digits. http:\/\/yann.lecun.com\/exdb\/mnist\/"},{"key":"e_1_3_2_2_42_1","volume-title":"GPU-accelerated Faster Mean Shift with euclidean distance metrics","author":"You Le","unstructured":"Le You, Han Jiang, Jinyong Hu, 2022. GPU-accelerated Faster Mean Shift with euclidean distance metrics. In IEEE COMPSAC. 211\u2013216."},{"key":"e_1_3_2_2_43_1","unstructured":"Aojun Zhou Yukun Ma Junnan Zhu 2021. Learning n: m fine-grained structured sparse neural networks from scratch. In ICLR."}],"event":{"name":"ICPP 2023: 52nd International Conference on Parallel Processing","location":"Salt Lake City UT USA","acronym":"ICPP 2023"},"container-title":["Proceedings of the 52nd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605573.3605625","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605573.3605625","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605573.3605625","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:04Z","timestamp":1750182544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605573.3605625"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,7]]},"references-count":43,"alternative-id":["10.1145\/3605573.3605625","10.1145\/3605573"],"URL":"https:\/\/doi.org\/10.1145\/3605573.3605625","relation":{},"subject":[],"published":{"date-parts":[[2023,8,7]]},"assertion":[{"value":"2023-09-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}