{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T03:47:17Z","timestamp":1768535237240,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62274142"],"award-info":[{"award-number":["62274142"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Science and Technology Program","award":["KJZD20230923115213027"],"award-info":[{"award-number":["KJZD20230923115213027"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3669940.3707268","type":"proceedings-article","created":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T12:28:01Z","timestamp":1738844881000},"page":"731-745","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["MVQ: Towards Efficient DNN Compression and Acceleration with Masked Vector Quantization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7726-4883","authenticated-orcid":false,"given":"Shuaiting","family":"Li","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6677-2145","authenticated-orcid":false,"given":"Chengxuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0860-4442","authenticated-orcid":false,"given":"Juncan","family":"Deng","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0985-4478","authenticated-orcid":false,"given":"Zeyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3623-3554","authenticated-orcid":false,"given":"Zewen","family":"Ye","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4240-4284","authenticated-orcid":false,"given":"Zongsheng","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5431-609X","authenticated-orcid":false,"given":"Haibin","family":"Shen","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3722-9979","authenticated-orcid":false,"given":"Kejie","family":"Huang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587","author":"Chen Liang-Chieh","year":"2017","unstructured":"Liang-Chieh Chen. Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587, 2017."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.34.118"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_4_1","volume-title":"Dkm: Differentiable k-means clustering layer for neural network compression","author":"Cho Minsik","year":"2022","unstructured":"Minsik Cho, Keivan A. Vahid, Saurabh Adya, and Mohammad Rastegari. Dkm: Differentiable k-means clustering layer for neural network compression, 2022."},{"key":"e_1_3_2_1_5_1","volume-title":"Learned step size quantization. arXiv preprint arXiv:1902.08153","author":"Esser Steven K","year":"2019","unstructured":"Steven K Esser, Jeffrey L McKinstry, Deepika Bablani, Rathinakumar Appuswamy, and Dharmendra S Modha. Learned step size quantization. arXiv preprint arXiv:1902.08153, 2019."},{"key":"e_1_3_2_1_6_1","volume-title":"Christopher KI Williams, John Winn, and AndrewZisserman. The pascal visual object classes (voc) challenge. International journal of computer vision, 88:303--338","author":"Everingham Mark","year":"2010","unstructured":"Mark Everingham, Luc Van Gool, Christopher KI Williams, John Winn, and AndrewZisserman. The pascal visual object classes (voc) challenge. International journal of computer vision, 88:303--338, 2010."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586216"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358291"},{"key":"e_1_3_2_1_9_1","volume-title":"Compressing deep convolutional networks using vector quantization. arXiv preprint arXiv:1412.6115","author":"Gong Yunchao","year":"2014","unstructured":"Yunchao Gong, Liu Liu, Ming Yang, and Lubomir Bourdev. Compressing deep convolutional networks using vector quantization. arXiv preprint arXiv:1412.6115, 2014."},{"key":"e_1_3_2_1_10_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149, 2015."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358283"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071031"},{"key":"e_1_3_2_1_13_1","volume-title":"Product quantization for nearest neighbor search","author":"Jegou Herve","year":"2010","unstructured":"Herve Jegou, Matthijs Douze, and Cordelia Schmid. Product quantization for nearest neighbor search. IEEE transactions on pattern analysis and machine intelligence, 33(1):117--128, 2010."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_15_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980, 2014."},{"key":"e_1_3_2_1_16_1","volume-title":"Pruning vs quantization: Which is better?","author":"Kuzmin Andrey","year":"2023","unstructured":"Andrey Kuzmin, Markus Nagel, Mart van Baalen, Arash Behboodi, and Tijmen Blankevoort. Pruning vs quantization: Which is better?, 2023."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2019.8702105"},{"key":"e_1_3_2_1_18_1","volume-title":"Pruning filters for efficient convnets. arXiv preprint arXiv:1608.08710","author":"Li Hao","year":"2016","unstructured":"Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet, and Hans Peter Graf. Pruning filters for efficient convnets. arXiv preprint arXiv:1608.08710, 2016."},{"key":"e_1_3_2_1_19_1","first-page":"740","volume-title":"Proceedings, Part V 13","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. Microsoft coco: Common objects in context. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part V 13, pages 740--755. Springer, 2014."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.2979965"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00049"},{"key":"e_1_3_2_1_22_1","volume-title":"Learning sparse neural networks through l_0 regularization. arXiv preprint arXiv:1712.01312","author":"Louizos Christos","year":"2017","unstructured":"Christos Louizos, Max Welling, and Diederik P Kingma. Learning sparse neural networks through l_0 regularization. arXiv preprint arXiv:1712.01312, 2017."},{"key":"e_1_3_2_1_23_1","first-page":"15699","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Martinez Julieta","year":"2021","unstructured":"Julieta Martinez, Jashan Shewakramani, Ting Wei Liu, Ioan Andrei Barsan, Wenyuan Zeng, and Raquel Urtasun. Permute, quantize, and fine-tune: Efficient compression of neural networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pages 15699--15708, June 2021."},{"key":"e_1_3_2_1_24_1","volume-title":"Jeff Pool, Darko Stosic, Dusan Stosic, Ganesh Venkatesh, Chong Yu, and Paulius Micikevicius. Accelerating sparse deep neural networks. arXiv preprint arXiv:2104.08378","author":"Mishra Asit","year":"2021","unstructured":"Asit Mishra, Jorge Albericio Latorre, Jeff Pool, Darko Stosic, Dusan Stosic, Ganesh Venkatesh, Chong Yu, and Paulius Micikevicius. Accelerating sparse deep neural networks. arXiv preprint arXiv:2104.08378, 2021."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01152"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2019.2912307"},{"key":"e_1_3_2_1_27_1","volume-title":"Comparing rewinding and fine-tuning in neural network pruning. arXiv preprint arXiv:2003.02389","author":"Renda Alex","year":"2020","unstructured":"Alex Renda, Jonathan Frankle, and Michael Carbin. Comparing rewinding and fine-tuning in neural network pruning. arXiv preprint arXiv:2003.02389, 2020."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9365939"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2019.2935251"},{"key":"e_1_3_2_1_30_1","volume-title":"An accelerator for sparse convolutional neural networks leveraging systolic general matrix-matrix multiplication. ACM Transactions on Architecture and Code Optimization (TACO), 19(3):1--26","author":"Soltaniyeh Mohammadreza","year":"2022","unstructured":"Mohammadreza Soltaniyeh, Richard P Martin, and Santosh Nagarakatte. An accelerator for sparse convolutional neural networks leveraging systolic general matrix-matrix multiplication. ACM Transactions on Architecture and Code Optimization (TACO), 19(3):1--26, 2022."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_14"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.vlsi.2017.02.002"},{"key":"e_1_3_2_1_33_1","volume-title":"And the bit goes down: Revisiting the quantization of neural networks. arXiv preprint arXiv:1907.05686","author":"Stock Pierre","year":"2019","unstructured":"Pierre Stock, Armand Joulin, R\u00e9mi Gribonval, Benjamin Graham, and Herv\u00e9 J\u00e9gou. And the bit goes down: Revisiting the quantization of neural networks. arXiv preprint arXiv:1907.05686, 2019."},{"key":"e_1_3_2_1_34_1","volume-title":"et al. Dominosearch: Find layer-wise fine-grained n: M sparse schemes from dense neural networks. Advances in neural information processing systems, 34:20721--20732","author":"Sun Wei","year":"2021","unstructured":"Wei Sun, Aojun Zhou, Sander Stuijk, Rob Wijnhoven, Andrew O Nelson, Henk Corporaal, et al. Dominosearch: Find layer-wise fine-grained n: M sparse schemes from dense neural networks. Advances in neural information processing systems, 34:20721--20732, 2021."},{"key":"e_1_3_2_1_35_1","article-title":"An energy-efficient cnn accelerator with enhanced weight stationary dataflow","author":"Wang Chengxuan","year":"2024","unstructured":"Chengxuan Wang, Zongsheng Wang, Shuaiting Li, Yuanming Zhang, Haibin Shen, and Kejie Huang. Ews: An energy-efficient cnn accelerator with enhanced weight stationary dataflow. IEEE Transactions on Circuits and Systems II: Express Briefs, 2024.","journal-title":"IEEE Transactions on Circuits and Systems II: Express Briefs"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.521"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/AICAS.2019.8771469"},{"key":"e_1_3_2_1_38_1","volume-title":"Learning n: m fine-grained structured sparse neural networks from scratch. arXiv preprint arXiv:2102.04010","author":"Zhou Aojun","year":"2021","unstructured":"Aojun Zhou, Yukun Ma, Junnan Zhu, Jianbo Liu, Zhijie Zhang, Kun Yuan, Wenxiu Sun, and Hongsheng Li. Learning n: m fine-grained structured sparse neural networks from scratch. arXiv preprint arXiv:2102.04010, 2021."}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707268","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3669940.3707268","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T14:47:06Z","timestamp":1755787626000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707268"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":38,"alternative-id":["10.1145\/3669940.3707268","10.1145\/3669940"],"URL":"https:\/\/doi.org\/10.1145\/3669940.3707268","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}