{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T01:12:47Z","timestamp":1773277967875,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"National Key R&D Program of China","award":["2021ZD0110104"],"award-info":[{"award-number":["2021ZD0110104"]}]},{"name":"National Natural Science Foundation of China (NSFC)","award":["62222210"],"award-info":[{"award-number":["62222210"]}]},{"name":"National Natural Science Foundation of China (NSFC)","award":["U21B2017"],"award-info":[{"award-number":["U21B2017"]}]},{"name":"National Natural Science Foundation of China (NSFC)","award":["62072297"],"award-info":[{"award-number":["62072297"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620666.3651351","type":"proceedings-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T12:08:21Z","timestamp":1713960501000},"page":"416-430","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Fractal: Joint Multi-Level Sparse Pattern Tuning of Accuracy and Performance for DNN Pruning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7433-2627","authenticated-orcid":false,"given":"Yue","family":"Guan","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qizhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4175-6927","authenticated-orcid":false,"given":"Changming","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qizhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3652-5437","authenticated-orcid":false,"given":"Yangjie","family":"Zhou","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qizhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5660-5493","authenticated-orcid":false,"given":"Jingwen","family":"Leng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qizhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6218-4659","authenticated-orcid":false,"given":"Chao","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qizhi Institute, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0034-2302","authenticated-orcid":false,"given":"Minyi","family":"Guo","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"},{"name":"Shanghai Qizhi Institute, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"CUTLASS. https:\/\/github.com\/NVIDIA\/cutlass January."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-41321-1_2"},{"key":"e_1_3_2_1_4_1","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. TVM: An automated End-to-End optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pages 578--594, Carlsbad, CA, October 2018. USENIX Association."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476182"},{"key":"e_1_3_2_1_6_1","volume-title":"A survey on deep neural network pruning-taxonomy, comparison, analysis, and recommendations. arXiv preprint arXiv:2308.06767","author":"Cheng Hongrong","year":"2023","unstructured":"Hongrong Cheng, Miao Zhang, and Javen Qinfeng Shi. A survey on deep neural network pruning-taxonomy, comparison, analysis, and recommendations. arXiv preprint arXiv:2308.06767, 2023."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_2_1_8_1","volume-title":"Pixelated butterfly: Simple and efficient sparse training for neural network models. arXiv preprint arXiv:2112.00029","author":"Dao Tri","year":"2021","unstructured":"Tri Dao, Beidi Chen, Kaizhao Liang, Jiaming Yang, Zhao Song, Atri Rudra, and Christopher Re. Pixelated butterfly: Simple and efficient sparse training for neural network models. arXiv preprint arXiv:2112.00029, 2021."},{"key":"e_1_3_2_1_9_1","first-page":"1517","volume-title":"International conference on machine learning","author":"Dao Tri","year":"2019","unstructured":"Tri Dao, Albert Gu, Matthew Eichhorn, Atri Rudra, and Christopher R\u00e9. Learning fast algorithms for linear transforms using butterfly factorizations. In International conference on machine learning, pages 1517--1527. PMLR, 2019."},{"key":"e_1_3_2_1_10_1","volume-title":"Spqr: A sparse-quantized representation for near-lossless llm weight compression. arXiv preprint arXiv:2306.03078","author":"Dettmers Tim","year":"2023","unstructured":"Tim Dettmers, Ruslan Svirschevski, Vage Egiazarian, Denis Kuznedelev, Elias Frantar, Saleh Ashkboos, Alexander Borzunov, Torsten Hoefler, and Dan Alistarh. Spqr: A sparse-quantized representation for near-lossless llm weight compression. arXiv preprint arXiv:2306.03078, 2023."},{"key":"e_1_3_2_1_11_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_12_1","volume-title":"Third International Workshop on Paraphrasing (IWP2005)","author":"Dolan Bill","year":"2005","unstructured":"Bill Dolan and Chris Brockett. Automatically constructing a corpus of sentential paraphrases. In Third International Workshop on Paraphrasing (IWP2005), 2005."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3576933"},{"key":"e_1_3_2_1_14_1","volume-title":"Perforatedcnns: Acceleration through elimination of redundant convolutions. Advances in neural information processing systems","author":"Figurnov Mikhail","year":"2016","unstructured":"Mikhail Figurnov, Aizhan Ibraimova, Dmitry P Vetrov, and Pushmeet Kohli. Perforatedcnns: Acceleration through elimination of redundant convolutions. Advances in neural information processing systems, 2016."},{"key":"e_1_3_2_1_15_1","first-page":"10323","volume-title":"International Conference on Machine Learning","author":"Frantar Elias","year":"2023","unstructured":"Elias Frantar and Dan Alistarh. Sparsegpt: Massive language models can be accurately pruned in one-shot. In International Conference on Machine Learning, pages 10323--10337. PMLR, 2023."},{"key":"e_1_3_2_1_16_1","volume-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323, 2022."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis.","author":"Gale Trevor","unstructured":"Trevor Gale, Matei Zaharia, Cliff Young, and Erich Elsen. Sparse GPU kernels for deep learning. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"e_1_3_2_1_19_1","unstructured":"Scott Gray Alec Radford and Diederik P Kingma. Gpu kernels for block-sparse weights."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.502"},{"key":"e_1_3_2_1_21_1","first-page":"10710","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"36","author":"Guan Yue","year":"2022","unstructured":"Yue Guan, Zhengyi Li, Zhouhan Lin, Yuhao Zhu, Jingwen Leng, and Minyi Guo. Block-skim: Efficient question answering for transformer. In Proceedings of the AAAI Conference on Artificial Intelligence, volume 36, pages 10710--10719, 2022."},{"key":"e_1_3_2_1_22_1","volume-title":"et al. Amanda: Unified instrumentation framework for deep neural networks","author":"Guan Yue","year":"2023","unstructured":"Yue Guan, Yuxian Qiu, Jingwen Leng, Fan Yang, Shuo Yu, Yunxin Liu, Yu Feng, Yuhao Zhu, Lidong Zhou, Yun Liang, et al. Amanda: Unified instrumentation framework for deep neural networks. 2023."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433722"},{"key":"e_1_3_2_1_24_1","volume-title":"Squant: On-the-fly data-free quantization via diagonal hessian approximation. arXiv preprint arXiv:2202.07471","author":"Guo Cong","year":"2022","unstructured":"Cong Guo, Yuxian Qiu, Jingwen Leng, Xiaotian Gao, Chen Zhang, Yunxin Liu, Fan Yang, Yuhao Zhu, and Minyi Guo. Squant: On-the-fly data-free quantization via diagonal hessian approximation. arXiv preprint arXiv:2202.07471, 2022."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589038"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2024.3365942"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_28_1","volume-title":"Structured pruning for deep convolutional neural networks: A survey. arXiv preprint arXiv:2303.00566","author":"He Yang","year":"2023","unstructured":"Yang He and Lingao Xiao. Structured pruning for deep convolutional neural networks: A survey. arXiv preprint arXiv:2303.00566, 2023."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.155"},{"key":"e_1_3_2_1_30_1","volume-title":"Yang Yang, and Yanqi Zhou. Deep learning scaling is predictable, empirically. arXiv preprint arXiv:1712.00409","author":"Hestness Joel","year":"2017","unstructured":"Joel Hestness, Sharan Narang, Newsha Ardalani, Gregory Diamos, Heewoo Jun, Hassan Kianinejad, Md Mostofa Ali Patwary, Yang Yang, and Yanqi Zhou. Deep learning scaling is predictable, empirically. arXiv preprint arXiv:1712.00409, 2017."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/3546258.3546499"},{"key":"e_1_3_2_1_32_1","volume-title":"Using code perforation to improve performance, reduce energy consumption, and respond to failures","author":"Hoffmann Henry","year":"2009","unstructured":"Henry Hoffmann, Sasa Misailovic, Stelios Sidiroglou, Anant Agarwal, and Martin Rinard. Using code perforation to improve performance, reduce energy consumption, and respond to failures. 2009."},{"key":"e_1_3_2_1_33_1","volume-title":"Accelerated sparse neural training: A provable and efficient method to find n: m transposable masks. Advances in neural information processing systems, 34:21099--21111","author":"Hubara Itay","year":"2021","unstructured":"Itay Hubara, Brian Chmiel, Moshe Island, Ron Banner, Joseph Naor, and Daniel Soudry. Accelerated sparse neural training: A provable and efficient method to find n: m transposable masks. Advances in neural information processing systems, 34:21099--21111, 2021."},{"key":"e_1_3_2_1_34_1","unstructured":"Yangqing Jia. Learning semantic image representations at a large scale."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_36_1","volume-title":"Scaling laws for neural language models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361, 2020."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2011.311"},{"key":"e_1_3_2_1_39_1","volume-title":"Layer-adaptive sparsity for the magnitude-based pruning. arXiv preprint arXiv:2010.07611","author":"Lee Jaeho","year":"2020","unstructured":"Jaeho Lee, Sejun Park, Sangwoo Mo, Sungsoo Ahn, and Jinwoo Shin. Layer-adaptive sparsity for the magnitude-based pruning. arXiv preprint arXiv:2010.07611, 2020."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571934"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3205289.3205317"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.045"},{"key":"e_1_3_2_1_43_1","first-page":"881","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Ma Lingxiao","year":"2020","unstructured":"Lingxiao Ma, Zhiqiang Xie, Zhi Yang, Jilong Xue, Youshan Miao, Wei Cui, Wenxiang Hu, Fan Yang, Lintao Zhang, and Lidong Zhou. Rammer: Enabling holistic deep learning compiler optimizations with {rTasks}. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20), pages 881--897, 2020."},{"key":"e_1_3_2_1_44_1","unstructured":"Asit Mishra Jorge Albericio Latorre Jeff Pool Darko Stosic Dusan Stosic Ganesh Venkatesh Chong Yu and Paulius Micikevicius. Accelerating sparse deep neural networks. arXiv preprint arXiv:2104.08378."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-018-04316-3"},{"key":"e_1_3_2_1_46_1","volume-title":"Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort. A white paper on neural network quantization. arXiv preprint arXiv:2106.08295","author":"Nagel Markus","year":"2021","unstructured":"Markus Nagel, Marios Fournarakis, Rana Ali Amjad, Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort. A white paper on neural network quantization. arXiv preprint arXiv:2106.08295, 2021."},{"key":"e_1_3_2_1_47_1","volume-title":"Block-sparse recurrent neural networks. arXiv preprint arXiv:1711.02782","author":"Narang Sharan","year":"2017","unstructured":"Sharan Narang, Eric Undersander, and Gregory Diamos. Block-sparse recurrent neural networks. arXiv preprint arXiv:1711.02782, 2017."},{"key":"e_1_3_2_1_48_1","volume-title":"GPU Technology Conference","author":"Naumov Maxim","year":"2010","unstructured":"Maxim Naumov, L Chien, Philippe Vandermersch, and Ujval Kapasi. Cusparse library. In GPU Technology Conference, 2010."},{"key":"e_1_3_2_1_49_1","volume-title":"cusparselt: A high-performance cuda library for sparse matrix-matrix multiplication","author":"NVIDIA.","year":"2021","unstructured":"NVIDIA. cusparselt: A high-performance cuda library for sparse matrix-matrix multiplication, 2021."},{"key":"e_1_3_2_1_50_1","volume-title":"cublas","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. cublas, 2023."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_2_1_52_1","unstructured":"Amit Sabne. Xla : Compiling machine learning for peak performance."},{"key":"e_1_3_2_1_53_1","first-page":"20378","article-title":"Adaptive sparsity by fine-tuning","volume":"33","author":"Sanh Victor","year":"2020","unstructured":"Victor Sanh, Thomas Wolf, and Alexander Rush. Movement pruning: Adaptive sparsity by fine-tuning. Advances in Neural Information Processing Systems, 33:20378--20389, 2020.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_54_1","first-page":"35783","article-title":"Tensor program optimization with probabilistic programs","volume":"35","author":"Shao Junru","year":"2022","unstructured":"Junru Shao, Xiyou Zhou, Siyuan Feng, Bohan Hou, Ruihang Lai, Hongyi Jin, Wuwei Lin, Masahiro Masuda, Cody Hao Yu, and Tianqi Chen. Tensor program optimization with probabilistic programs. Advances in Neural Information Processing Systems, 35:35783--35796, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_55_1","volume-title":"Proc. ACM Program. Lang., 3(OOPSLA).","author":"Sharif Hashim","unstructured":"Hashim Sharif, Prakalp Srivastava, Muhammad Huzaifa, Maria Kotsifakou, Keyur Joshi, Yasmin Sarita, Nathan Zhao, Vikram S. Adve, Sasa Misailovic, and Sarita Adve. Approxhpvm: a portable compiler ir for accuracy-aware optimizations. Proc. ACM Program. Lang., 3(OOPSLA)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3446108"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2025113.2025133"},{"key":"e_1_3_2_1_58_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_59_1","volume-title":"A simple and effective pruning approach for large language models. arXiv preprint arXiv:2306.11695","author":"Sun Mingjie","year":"2023","unstructured":"Mingjie Sun, Zhuang Liu, Anna Bair, and J Zico Kolter. A simple and effective pruning approach for large language models. arXiv preprint arXiv:2306.11695, 2023."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-50743-5_16"},{"key":"e_1_3_2_1_62_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. Attention is all you need. Advances in neural information processing systems."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.5555\/1413370.1413402"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_2_1_65_1","volume-title":"15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)","author":"Wang Haojie","year":"2021","unstructured":"Haojie Wang, Jidong Zhai, Mingyu Gao, Zixuan Ma, Shizhi Tang, Liyan Zheng, Yuanzhi Li, Kaiyuan Rong, Yuanyong Chen, and Zhihao Jia. {PET}: Optimizing tensor programs with partially equivalent transformations and automated corrections. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21), 2021."},{"key":"e_1_3_2_1_66_1","unstructured":"Haojun Xia Zhen Zheng Yuchao Li Donglin Zhuang Zhongzhu Zhou Xiafei Qiu Yong Li Wei Lin and Shuaiwen Leon Song. Flash-llm: Enabling cost-effective and highly-efficient large generative model inference with unstructured sparsity. arXiv preprint arXiv:2309.10285."},{"key":"e_1_3_2_1_67_1","unstructured":"Mengzhou Xia Zexuan Zhong and Danqi Chen. Structured pruning learns compact and accurate models. arXiv preprint arXiv:2204.00408."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582047"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"e_1_3_2_1_70_1","volume-title":"A survey of large language models. arXiv preprint arXiv:2303.18223","author":"Zhao Wayne Xin","year":"2023","unstructured":"Wayne Xin Zhao, Kun Zhou, Junyi Li, Tianyi Tang, Xiaolei Wang, Yupeng Hou, Yingqian Min, Beichen Zhang, Junjie Zhang, Zican Dong, et al. A survey of large language models. arXiv preprint arXiv:2303.18223, 2023."},{"key":"e_1_3_2_1_71_1","first-page":"863","volume-title":"14th USENIX symposium on operating systems design and implementation (OSDI 20)","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, et al. Ansor: Generating {High-Performance} tensor programs for deep learning. In 14th USENIX symposium on operating systems design and implementation (OSDI 20), pages 863--879, 2020."},{"key":"e_1_3_2_1_72_1","volume-title":"USENIX Symposium on Operating Systems Design and Implementation (OSDI)","author":"Zheng Ningxin","year":"2022","unstructured":"Ningxin Zheng, Bin Lin, Quanlu Zhang, Lingxiao Ma, Yuqing Yang, Fan Yang, Yang Wang, Mao Yang, and Lidong Zhou. SparTA: Deep-Learning model sparsity via Tensor-with-Sparsity-Attribute. In USENIX Symposium on Operating Systems Design and Implementation (OSDI), 2022."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651351","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:43Z","timestamp":1750291423000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651351"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":72,"alternative-id":["10.1145\/3620666.3651351","10.1145\/3620666"],"URL":"https:\/\/doi.org\/10.1145\/3620666.3651351","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}