{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T18:14:17Z","timestamp":1758478457424,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T00:00:00Z","timestamp":1622678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["Discovery Project DP210101984"],"award-info":[{"award-number":["Discovery Project DP210101984"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006990","name":"Thomas F. and Kate Miller Jeffress Memorial Trust","doi-asserted-by":"publisher","award":["Jeffress Trust Awards in Interdisciplinary Research"],"award-info":[{"award-number":["Jeffress Trust Awards in Interdisciplinary Research"]}],"id":[{"id":"10.13039\/100006990","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["OAC-2034169, OAC-2042084, CCF-1937500, CNS-1909172, IIS-1850546, IIS-2008973, CNS-1951974"],"award-info":[{"award-number":["OAC-2034169, OAC-2042084, CCF-1937500, CNS-1909172, IIS-1850546, IIS-2008973, CNS-1951974"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,6,3]]},"DOI":"10.1145\/3447818.3459988","type":"proceedings-article","created":{"date-parts":[[2021,6,4]],"date-time":"2021-06-04T15:09:36Z","timestamp":1622819376000},"page":"266-278","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["ClickTrain"],"prefix":"10.1145","author":[{"given":"Chengming","family":"Zhang","sequence":"first","affiliation":[{"name":"Washington State University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Geng","family":"Yuan","sequence":"additional","affiliation":[{"name":"Northeastern University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Niu","sequence":"additional","affiliation":[{"name":"William &amp; Mary"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiannan","family":"Tian","sequence":"additional","affiliation":[{"name":"Washington State University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sian","family":"Jin","sequence":"additional","affiliation":[{"name":"Washington State University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Donglin","family":"Zhuang","sequence":"additional","affiliation":[{"name":"The University of Sydney"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhe","family":"Jiang","sequence":"additional","affiliation":[{"name":"The University of Alabama"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanzhi","family":"Wang","sequence":"additional","affiliation":[{"name":"Northeastern University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Ren","sequence":"additional","affiliation":[{"name":"William &amp; Mary"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuaiwen Leon","family":"Song","sequence":"additional","affiliation":[{"name":"University of Sydney"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dingwen","family":"Tao","sequence":"additional","affiliation":[{"name":"Washington State University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,6,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"NVIDIA QUADRO RTX 5000. 2020. https:\/\/www.nvidia.com\/en-us\/design-visualization\/quadro\/rtx-5000\/. Online.  NVIDIA QUADRO RTX 5000. 2020. https:\/\/www.nvidia.com\/en-us\/design-visualization\/quadro\/rtx-5000\/. Online."},{"key":"e_1_3_2_1_2_1","volume-title":"Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng.","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi , Paul Barham , Jianmin Chen , Zhifeng Chen , Andy Davis , Jeffrey Dean , Matthieu Devin , Sanjay Ghemawat , Geoffrey Irving , Michael Isard , Manjunath Kudlur , Josh Levenberg , Rajat Monga , Sherry Moore , Derek Gordon Murray , Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016 . TensorFlow: A System for Large-Scale Machine Learning. In 12th {USENIX} Symposium on Operating Systems Design and Implementation ( {OSDI} 16). 265--283. Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek Gordon Murray, Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: A System for Large-Scale Machine Learning. In 12th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 16). 265--283."},{"key":"e_1_3_2_1_3_1","volume-title":"Language models are few-shot learners. arXiv preprint arXiv:2005.14165","author":"Brown Tom B.","year":"2020","unstructured":"Tom B. Brown , Benjamin Mann , Nick Ryder , Melanie Subbiah , Jared Kaplan , Prafulla Dhariwal , Arvind Neelakantan , Pranav Shyam , Girish Sastry , Amanda Askell , Sandhini Agarwal , Ariel Herbert-Voss , Gretchen Krueger , Tom Henighan , Rewon Child , Aditya Ramesh , Daniel M. Ziegler , Jeffrey Wu , Clemens Winter , Christopher Hesse , Mark Chen , Eric Sigler , Mateusz Litwin , Scott Gray , Benjamin Chess , Jack Clark , Christopher Berner , Sam McCandlish , Alec Radford , Ilya Sutskever , and Dario Amodei . 2020. Language models are few-shot learners. arXiv preprint arXiv:2005.14165 ( 2020 ). Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel M. Ziegler, Jeffrey Wu, Clemens Winter, Christopher Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language models are few-shot learners. arXiv preprint arXiv:2005.14165 (2020)."},{"key":"e_1_3_2_1_4_1","volume-title":"YOLObile: Real-Time Object Detection on Mobile Devices via Compression-Compilation Co-Design. arXiv preprint arXiv:2009.05697","author":"Cai Yuxuan","year":"2020","unstructured":"Yuxuan Cai , Hongjia Li , Geng Yuan , Wei Niu , Yanyu Li , Xulong Tang , Bin Ren , and Yanzhi Wang . 2020. YOLObile: Real-Time Object Detection on Mobile Devices via Compression-Compilation Co-Design. arXiv preprint arXiv:2009.05697 ( 2020 ). Yuxuan Cai, Hongjia Li, Geng Yuan, Wei Niu, Yanyu Li, Xulong Tang, Bin Ren, and Yanzhi Wang. 2020. YOLObile: Real-Time Object Detection on Mobile Devices via Compression-Compilation Co-Design. arXiv preprint arXiv:2009.05697 (2020)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00319"},{"key":"e_1_3_2_1_6_1","unstructured":"CIFAR-10 and CIFAR-100. 2020. https:\/\/www.cs.toronto.edu\/ kriz\/cifar.html. Online.  CIFAR-10 and CIFAR-100. 2020. https:\/\/www.cs.toronto.edu\/ kriz\/cifar.html. Online."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390177"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2019.2914438"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124552"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3194554.3194625"},{"key":"e_1_3_2_1_11_1","volume-title":"RTMobile: Beyond Real-Time Mobile Acceleration of RNNs for Speech Recognition. arXiv preprint arXiv:2002.11474","author":"Dong Peiyan","year":"2020","unstructured":"Peiyan Dong , Siyue Wang , Wei Niu , Chengming Zhang , Sheng Lin , Zhengang Li , Yifan Gong , Bin Ren , Xue Lin , Yanzhi Wang , and Dingwen Tao . 2020. RTMobile: Beyond Real-Time Mobile Acceleration of RNNs for Speech Recognition. arXiv preprint arXiv:2002.11474 ( 2020 ). Peiyan Dong, Siyue Wang, Wei Niu, Chengming Zhang, Sheng Lin, Zhengang Li, Yifan Gong, Bin Ren, Xue Lin, Yanzhi Wang, and Dingwen Tao. 2020. RTMobile: Beyond Real-Time Mobile Acceleration of RNNs for Speech Recognition. arXiv preprint arXiv:2002.11474 (2020)."},{"key":"e_1_3_2_1_12_1","unstructured":"Xuanyi Dong and Yi Yang. 2019. Network pruning via transformable architecture search. In Advances in Neural Information Processing Systems. 759--770.  Xuanyi Dong and Yi Yang. 2019. Network pruning via transformable architecture search. In Advances in Neural Information Processing Systems . 759--770."},{"key":"e_1_3_2_1_13_1","volume-title":"Jan Hendrik Metzen, and Frank Hutter","author":"Elsken Thomas","year":"2018","unstructured":"Thomas Elsken , Jan Hendrik Metzen, and Frank Hutter . 2018 . Neural architecture search: A survey. arXiv preprint arXiv:1808.05377 (2018). Thomas Elsken, Jan Hendrik Metzen, and Frank Hutter. 2018. Neural architecture search: A survey. arXiv preprint arXiv:1808.05377 (2018)."},{"key":"e_1_3_2_1_14_1","volume-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks. arXiv preprint arXiv:1803.03635","author":"Frankle Jonathan","year":"2018","unstructured":"Jonathan Frankle and Michael Carbin . 2018. The lottery ticket hypothesis: Finding sparse, trainable neural networks. arXiv preprint arXiv:1803.03635 ( 2018 ). Jonathan Frankle and Michael Carbin. 2018. The lottery ticket hypothesis: Finding sparse, trainable neural networks. arXiv preprint arXiv:1803.03635 (2018)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.93808"},{"key":"e_1_3_2_1_16_1","volume-title":"Sparse GPU Kernels for Deep Learning. arXiv preprint arXiv:2006.10901","author":"Gale Trevor","year":"2020","unstructured":"Trevor Gale , Matei Zaharia , Cliff Young , and Erich Elsen . 2020. Sparse GPU Kernels for Deep Learning. arXiv preprint arXiv:2006.10901 ( 2020 ). Trevor Gale, Matei Zaharia, Cliff Young, and Erich Elsen. 2020. Sparse GPU Kernels for Deep Learning. arXiv preprint arXiv:2006.10901 (2020)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2019.00-43"},{"key":"e_1_3_2_1_18_1","volume-title":"Hot Chips Symposium. 1--6.","author":"Han Song","year":"2016","unstructured":"Song Han , Xingyu Liu , Huizi Mao , Jing Pu , Ardavan Pedram , Mark Horowitz , and Bill Dally . 2016 . Deep compression and EIE: Efficient inference engine on compressed deep neural network .. In Hot Chips Symposium. 1--6. Song Han, Xingyu Liu, Huizi Mao, Jing Pu, Ardavan Pedram, Mark Horowitz, and Bill Dally. 2016. Deep compression and EIE: Efficient inference engine on compressed deep neural network.. In Hot Chips Symposium. 1--6."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/309"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.155"},{"key":"e_1_3_2_1_22_1","volume-title":"GE-SpMM: General-purpose Sparse Matrix-Matrix Multiplication on GPUs for Graph Neural Networks. arXiv preprint arXiv:2007.03179","author":"Huang Guyue","year":"2020","unstructured":"Guyue Huang , Guohao Dai , Yu Wang , and Huazhong Yang . 2020. GE-SpMM: General-purpose Sparse Matrix-Matrix Multiplication on GPUs for Graph Neural Networks. arXiv preprint arXiv:2007.03179 ( 2020 ). Guyue Huang, Guohao Dai, Yu Wang, and Huazhong Yang. 2020. GE-SpMM: General-purpose Sparse Matrix-Matrix Multiplication on GPUs for Graph Neural Networks. arXiv preprint arXiv:2007.03179 (2020)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3326608"},{"key":"e_1_3_2_1_24_1","unstructured":"Alex Krizhevsky Ilya Sutskever and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems. 1097--1105.  Alex Krizhevsky Ilya Sutskever and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems . 1097--1105."},{"key":"e_1_3_2_1_25_1","unstructured":"Large Scale Visual Recognition Challenge. 2020. http:\/\/www.image-net.org\/challenges\/LSVRC\/. Online.  Large Scale Visual Recognition Challenge. 2020. http:\/\/www.image-net.org\/challenges\/LSVRC\/. Online."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356169"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394885.3431627"},{"key":"e_1_3_2_1_28_1","volume-title":"Automatic Structured Weight Pruning Framework of DNNs with Ultra-High Efficiency. arXiv preprint arXiv:2001.08839","author":"Li Zhengang","year":"2020","unstructured":"Zhengang Li , Yifan Gong , Xiaolong Ma , Sijia Liu , Mengshu Sun , Zheng Zhan , Zhenglun Kong , Geng Yuan , and Yanzhi Wang . 2020. SS-Auto : A Single-Shot , Automatic Structured Weight Pruning Framework of DNNs with Ultra-High Efficiency. arXiv preprint arXiv:2001.08839 ( 2020 ). Zhengang Li, Yifan Gong, Xiaolong Ma, Sijia Liu, Mengshu Sun, Zheng Zhan, Zhenglun Kong, Geng Yuan, and Yanzhi Wang. 2020. SS-Auto: A Single-Shot, Automatic Structured Weight Pruning Framework of DNNs with Ultra-High Efficiency. arXiv preprint arXiv:2001.08839 (2020)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00290"},{"key":"e_1_3_2_1_30_1","volume-title":"Rethinking the value of network pruning. arXiv preprint arXiv:1810.05270","author":"Liu Zhuang","year":"2018","unstructured":"Zhuang Liu , Mingjie Sun , Tinghui Zhou , Gao Huang , and Trevor Darrell . 2018. Rethinking the value of network pruning. arXiv preprint arXiv:1810.05270 ( 2018 ). Zhuang Liu, Mingjie Sun, Tinghui Zhou, Gao Huang, and Trevor Darrell. 2018. Rethinking the value of network pruning. arXiv preprint arXiv:1810.05270 (2018)."},{"key":"e_1_3_2_1_31_1","unstructured":"Justin Luitjens. 2013. CUDA Pro Tip: Increase Performance with Vectorized Memory Access. https:\/\/developer.nvidia.com\/blog\/cuda-pro-tip-increase-performance-with-vectorized-memory-access\/. Online.  Justin Luitjens. 2013. CUDA Pro Tip: Increase Performance with Vectorized Memory Access. https:\/\/developer.nvidia.com\/blog\/cuda-pro-tip-increase-performance-with-vectorized-memory-access\/. Online."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.541"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356156"},{"key":"e_1_3_2_1_34_1","volume-title":"Pconv: The missing but desirable sparsity in dnn weight pruning for real-time execution on mobile devices. arXiv preprint arXiv:1909.05073","author":"Ma Xiaolong","year":"2019","unstructured":"Xiaolong Ma , Fu-Ming Guo , Wei Niu , Xue Lin , Jian Tang , Kaisheng Ma , Bin Ren , and Yanzhi Wang . 2019 a. Pconv: The missing but desirable sparsity in dnn weight pruning for real-time execution on mobile devices. arXiv preprint arXiv:1909.05073 (2019). Xiaolong Ma, Fu-Ming Guo, Wei Niu, Xue Lin, Jian Tang, Kaisheng Ma, Bin Ren, and Yanzhi Wang. 2019a. Pconv: The missing but desirable sparsity in dnn weight pruning for real-time execution on mobile devices. arXiv preprint arXiv:1909.05073 (2019)."},{"key":"e_1_3_2_1_35_1","volume-title":"BLK-REW: A Unified Block-based DNN Pruning Framework using Reweighted Regularization Method. arXiv preprint arXiv:2001.08357","author":"Ma Xiaolong","year":"2020","unstructured":"Xiaolong Ma , Zhengang Li , Yifan Gong , Tianyun Zhang , Wei Niu , Zheng Zhan , Pu Zhao , Jian Tang , Xue Lin , Bin Ren , and Yanzhi Wang . 2020a. BLK-REW: A Unified Block-based DNN Pruning Framework using Reweighted Regularization Method. arXiv preprint arXiv:2001.08357 ( 2020 ). Xiaolong Ma, Zhengang Li, Yifan Gong, Tianyun Zhang, Wei Niu, Zheng Zhan, Pu Zhao, Jian Tang, Xue Lin, Bin Ren, and Yanzhi Wang. 2020a. BLK-REW: A Unified Block-based DNN Pruning Framework using Reweighted Regularization Method. arXiv preprint arXiv:2001.08357 (2020)."},{"key":"e_1_3_2_1_36_1","volume-title":"Zhengang Li, Deliang Fan, Xuehai Qian, et al.","author":"Ma Xiaolong","year":"2021","unstructured":"Xiaolong Ma , Sheng Lin , Shaokai Ye , Zhezhi He , Linfeng Zhang , Geng Yuan , Sia Huat Tan , Zhengang Li, Deliang Fan, Xuehai Qian, et al. 2021 . Non-Structured DNN Weight Pruning--Is It Beneficial in Any Platform? IEEE Transactions on Neural Networks and Learning Systems ( 2021). Xiaolong Ma, Sheng Lin, Shaokai Ye, Zhezhi He, Linfeng Zhang, Geng Yuan, Sia Huat Tan, Zhengang Li, Deliang Fan, Xuehai Qian, et al. 2021. Non-Structured DNN Weight Pruning--Is It Beneficial in Any Platform? IEEE Transactions on Neural Networks and Learning Systems (2021)."},{"key":"e_1_3_2_1_37_1","volume-title":"Zhengang Li, Deliang Fan, Xuehai Qian, Xue Lin, Kaisheng Ma, and Yanzhi Wang.","author":"Ma Xiaolong","year":"2019","unstructured":"Xiaolong Ma , Sheng Lin , Shaokai Ye , Zhezhi He , Linfeng Zhang , Geng Yuan , Sia Huat Tan , Zhengang Li, Deliang Fan, Xuehai Qian, Xue Lin, Kaisheng Ma, and Yanzhi Wang. 2019 b. Non-Structured DNN Weight Pruning -- Is It Beneficial in Any Platform ? [arxiv]cs.LG\/1907.02124 Xiaolong Ma, Sheng Lin, Shaokai Ye, Zhezhi He, Linfeng Zhang, Geng Yuan, Sia Huat Tan, Zhengang Li, Deliang Fan, Xuehai Qian, Xue Lin, Kaisheng Ma, and Yanzhi Wang. 2019b. Non-Structured DNN Weight Pruning -- Is It Beneficial in Any Platform? [arxiv]cs.LG\/1907.02124"},{"key":"e_1_3_2_1_38_1","volume-title":"An Image Enhancing Pattern-based Sparsity for Real-time Inference on Mobile Devices. arXiv preprint arXiv:2001.07710","author":"Ma Xiaolong","year":"2020","unstructured":"Xiaolong Ma , Wei Niu , Tianyun Zhang , Sijia Liu , Fu-Ming Guo , Sheng Lin , Hongjia Li , Xiang Chen , Jian Tang , Kaisheng Ma , Bin Ren , and Yanzhi Wang . 2020b. An Image Enhancing Pattern-based Sparsity for Real-time Inference on Mobile Devices. arXiv preprint arXiv:2001.07710 ( 2020 ). Xiaolong Ma, Wei Niu, Tianyun Zhang, Sijia Liu, Fu-Ming Guo, Sheng Lin, Hongjia Li, Xiang Chen, Jian Tang, Kaisheng Ma, Bin Ren, and Yanzhi Wang. 2020b. An Image Enhancing Pattern-based Sparsity for Real-time Inference on Mobile Devices. arXiv preprint arXiv:2001.07710 (2020)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASP-DAC47756.2020.9045658"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/NANOARCH47378.2019.181304"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISQED.2018.8357306"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-9868.2007.00627.x"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01152"},{"key":"e_1_3_2_1_44_1","volume-title":"Pruning convolutional neural networks for resource efficient inference. arXiv preprint arXiv:1611.06440","author":"Molchanov Pavlo","year":"2016","unstructured":"Pavlo Molchanov , Stephen Tyree , Tero Karras , Timo Aila , and Jan Kautz . 2016. Pruning convolutional neural networks for resource efficient inference. arXiv preprint arXiv:1611.06440 ( 2016 ). Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila, and Jan Kautz. 2016. Pruning convolutional neural networks for resource efficient inference. arXiv preprint arXiv:1611.06440 (2016)."},{"key":"e_1_3_2_1_45_1","volume-title":"GPU Technology Conference.","author":"Naumov M","year":"2010","unstructured":"M Naumov , LS Chien , P Vandermersch , and U Kapasi . 2010 . Cusparse library . In GPU Technology Conference. M Naumov, LS Chien, P Vandermersch, and U Kapasi. 2010. Cusparse library. In GPU Technology Conference."},{"key":"e_1_3_2_1_46_1","volume-title":"Achieving Real-Time Execution of Transformer-based Large-scale Models on Mobile with Compiler-aware Neural Architecture Optimization. arXiv preprint arXiv:2009.06823","author":"Niu Wei","year":"2020","unstructured":"Wei Niu , Zhenglun Kong , Geng Yuan , Weiwen Jiang , Jiexiong Guan , Caiwen Ding , Pu Zhao , Sijia Liu , Bin Ren , and Yanzhi Wang . 2020a. Achieving Real-Time Execution of Transformer-based Large-scale Models on Mobile with Compiler-aware Neural Architecture Optimization. arXiv preprint arXiv:2009.06823 ( 2020 ). Wei Niu, Zhenglun Kong, Geng Yuan, Weiwen Jiang, Jiexiong Guan, Caiwen Ding, Pu Zhao, Sijia Liu, Bin Ren, and Yanzhi Wang. 2020a. Achieving Real-Time Execution of Transformer-based Large-scale Models on Mobile with Compiler-aware Neural Architecture Optimization. arXiv preprint arXiv:2009.06823 (2020)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378534"},{"key":"e_1_3_2_1_48_1","unstructured":"ntel Xeon E5-2680 v4 vs Qualcomm SM8150 Snapdragon 855. 2020. https:\/\/gadgetversus.com\/processor\/intel-xeon-e5-2680-v4-vs-qualcomm-sm8150-snapdragon-855\/. Online.  ntel Xeon E5-2680 v4 vs Qualcomm SM8150 Snapdragon 855. 2020. https:\/\/gadgetversus.com\/processor\/intel-xeon-e5-2680-v4-vs-qualcomm-sm8150-snapdragon-855\/. Online."},{"key":"e_1_3_2_1_49_1","first-page":"31","article-title":"Cublas library. NVIDIA Corporation, Santa Clara","volume":"15","author":"Nvidia CUDA","year":"2008","unstructured":"CUDA Nvidia . 2008 . Cublas library. NVIDIA Corporation, Santa Clara , California 15 , 27 (2008), 31 . CUDA Nvidia. 2008. Cublas library. NVIDIA Corporation, Santa Clara, California 15, 27 (2008), 31.","journal-title":"California"},{"key":"e_1_3_2_1_50_1","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga Alban Desmaison Andreas K\u00f6pf Edward Yang Zachary DeVito Martin Raison Alykhan Tejani Sasank Chilamkurthy Benoit Steiner Lu Fang Junjie Bai and Soumith Chintala. 2019. PyTorch: An imperative style high-performance deep learning library. In Advances in Neural Information Processing Systems. 8024--8035.  Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga Alban Desmaison Andreas K\u00f6pf Edward Yang Zachary DeVito Martin Raison Alykhan Tejani Sasank Chilamkurthy Benoit Steiner Lu Fang Junjie Bai and Soumith Chintala. 2019. PyTorch: An imperative style high-performance deep learning library. In Advances in Neural Information Processing Systems . 8024--8035."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3199605"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414648"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3168831"},{"key":"e_1_3_2_1_54_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 ( 2014 ). Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_55_1","volume-title":"Frontera: The evolution of leadership computing at the national science foundation. In Practice and Experience in Advanced Research Computing. 106--111.","author":"Stanzione Dan","year":"2020","unstructured":"Dan Stanzione , John West , R Todd Evans , Tommy Minyard , Omar Ghattas , and Dhabaleswar K Panda . 2020 . Frontera: The evolution of leadership computing at the national science foundation. In Practice and Experience in Advanced Research Computing. 106--111. Dan Stanzione, John West, R Todd Evans, Tommy Minyard, Omar Ghattas, and Dhabaleswar K Panda. 2020. Frontera: The evolution of leadership computing at the national science foundation. In Practice and Experience in Advanced Research Computing. 106--111."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783273"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178491"},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"32","author":"Wang Yanzhi","year":"2018","unstructured":"Yanzhi Wang , Caiwen Ding , Zhe Li , Geng Yuan , Siyu Liao , Xiaolong Ma , Bo Yuan , Xuehai Qian , Jian Tang , Qinru Qiu , 2018 a. Towards ultra-high performance and energy efficiency of deep learning systems: an algorithm-hardware co-optimization framework . In Proceedings of the AAAI Conference on Artificial Intelligence , Vol. 32 . Yanzhi Wang, Caiwen Ding, Zhe Li, Geng Yuan, Siyu Liao, Xiaolong Ma, Bo Yuan, Xuehai Qian, Jian Tang, Qinru Qiu, et al. 2018a. Towards ultra-high performance and energy efficiency of deep learning systems: an algorithm-hardware co-optimization framework. In Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 32."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6910"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.643"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015676"},{"key":"e_1_3_2_1_63_1","volume-title":"Rethinking the smaller-norm-less-informative assumption in channel pruning of convolution layers. arXiv preprint arXiv:1802.00124","author":"Ye Jianbo","year":"2018","unstructured":"Jianbo Ye , Xin Lu , Zhe Lin , and James Z Wang . 2018. Rethinking the smaller-norm-less-informative assumption in channel pruning of convolution layers. arXiv preprint arXiv:1802.00124 ( 2018 ). Jianbo Ye, Xin Lu, Zhe Lin, and James Z Wang. 2018. Rethinking the smaller-norm-less-informative assumption in channel pruning of convolution layers. arXiv preprint arXiv:1802.00124 (2018)."},{"key":"e_1_3_2_1_64_1","volume-title":"Drawing early-bird tickets: Towards more efficient training of deep networks. arXiv preprint arXiv:1909.11957","author":"You Haoran","year":"2019","unstructured":"Haoran You , Chaojian Li , Pengfei Xu , Yonggan Fu , Yue Wang , Xiaohan Chen , Richard G Baraniuk , Zhangyang Wang , and Yingyan Lin . 2019a. Drawing early-bird tickets: Towards more efficient training of deep networks. arXiv preprint arXiv:1909.11957 ( 2019 ). Haoran You, Chaojian Li, Pengfei Xu, Yonggan Fu, Yue Wang, Xiaohan Chen, Richard G Baraniuk, Zhangyang Wang, and Yingyan Lin. 2019a. Drawing early-bird tickets: Towards more efficient training of deep networks. arXiv preprint arXiv:1909.11957 (2019)."},{"key":"e_1_3_2_1_65_1","unstructured":"Zhonghui You Kun Yan Jinmian Ye Meng Ma and Ping Wang. 2019b. Gate decorator: Global filter pruning method for accelerating deep convolutional neural networks. In Advances in Neural Information Processing Systems. 2133--2144.  Zhonghui You Kun Yan Jinmian Ye Meng Ma and Ping Wang. 2019b. Gate decorator: Global filter pruning method for accelerating deep convolutional neural networks. In Advances in Neural Information Processing Systems . 2133--2144."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00958"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED.2019.8824944"},{"key":"e_1_3_2_1_68_1","volume-title":"A SOT-MRAM-based Processing-In-Memory Engine for Highly Compressed DNN Implementation. arXiv preprint arXiv:1912.05416","author":"Yuan Geng","year":"2019","unstructured":"Geng Yuan , Xiaolong Ma , Sheng Lin , Zhengang Li , and Caiwen Ding . 2019b. A SOT-MRAM-based Processing-In-Memory Engine for Highly Compressed DNN Implementation. arXiv preprint arXiv:1912.05416 ( 2019 ). Geng Yuan, Xiaolong Ma, Sheng Lin, Zhengang Li, and Caiwen Ding. 2019b. A SOT-MRAM-based Processing-In-Memory Engine for Highly Compressed DNN Implementation. arXiv preprint arXiv:1912.05416 (2019)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_12"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3056929"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00051"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00051"},{"key":"e_1_3_2_1_73_1","unstructured":"Pu Zhao Wei Niu Geng Yuan Yuxuan Cai Hsin-Hsuan Sung Wujie Wen Sijia Liu Xipeng Shen Bin Ren Yanzhi Wang etal 2020. Achieving Real-Time LiDAR 3D Object Detection on a Mobile Device. arXiv preprint arXiv:2012.13801 (2020).  Pu Zhao Wei Niu Geng Yuan Yuxuan Cai Hsin-Hsuan Sung Wujie Wen Sijia Liu Xipeng Shen Bin Ren Yanzhi Wang et al. 2020. Achieving Real-Time LiDAR 3D Object Detection on a Mobile Device. arXiv preprint arXiv:2012.13801 (2020)."},{"key":"e_1_3_2_1_74_1","unstructured":"Zhuangwei Zhuang Mingkui Tan Bohan Zhuang Jing Liu Yong Guo Qingyao Wu Junzhou Huang and Jinhui Zhu. 2018. Discrimination-aware channel pruning for deep neural networks. In Advances in Neural Information Processing Systems. 875--886.  Zhuangwei Zhuang Mingkui Tan Bohan Zhuang Jing Liu Yong Guo Qingyao Wu Junzhou Huang and Jinhui Zhu. 2018. Discrimination-aware channel pruning for deep neural networks. In Advances in Neural Information Processing Systems . 875--886."}],"event":{"name":"ICS '21: 2021 International Conference on Supercomputing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Virtual Event USA","acronym":"ICS '21"},"container-title":["Proceedings of the ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447818.3459988","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3447818.3459988","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3447818.3459988","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:05Z","timestamp":1750193285000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447818.3459988"}},"subtitle":["efficient and accurate end-to-end deep learning training via fine-grained architecture-preserving pruning"],"short-title":[],"issued":{"date-parts":[[2021,6,3]]},"references-count":74,"alternative-id":["10.1145\/3447818.3459988","10.1145\/3447818"],"URL":"https:\/\/doi.org\/10.1145\/3447818.3459988","relation":{},"subject":[],"published":{"date-parts":[[2021,6,3]]},"assertion":[{"value":"2021-06-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}