{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:09:32Z","timestamp":1750219772415,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,7,14]],"date-time":"2023-07-14T00:00:00Z","timestamp":1689292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276153"],"award-info":[{"award-number":["62276153"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,14]]},"DOI":"10.1145\/3614008.3614020","type":"proceedings-article","created":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T18:19:52Z","timestamp":1697566792000},"page":"76-81","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Learnable Sparsity Structured Pruning for Acoustic Pre-trained Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0934-1572","authenticated-orcid":false,"given":"Siyuan","family":"Wang","sequence":"first","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9167-2459","authenticated-orcid":false,"given":"Haoyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8622-2752","authenticated-orcid":false,"given":"Jian","family":"Li","sequence":"additional","affiliation":[{"name":"Sinovoice, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3841-1959","authenticated-orcid":false,"given":"Wei-Qiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Efficient 8-bit quantization of transformer neural machine language translation model. arXiv preprint arXiv:1906.00532","author":"Bhandare Aishwarya","year":"2019","unstructured":"Aishwarya Bhandare , Vamsi Sripathi , Deepthi Karkada , Vivek Menon , Sun Choi , Kushal Datta , and Vikram Saletore . 2019. Efficient 8-bit quantization of transformer neural machine language translation model. arXiv preprint arXiv:1906.00532 ( 2019 ). Aishwarya Bhandare, Vamsi Sripathi, Deepthi Karkada, Vivek Menon, Sun Choi, Kushal Datta, and Vikram Saletore. 2019. Efficient 8-bit quantization of transformer neural machine language translation model. arXiv preprint arXiv:1906.00532 (2019)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1361"},{"key":"e_1_3_2_1_4_1","volume-title":"Universal transformers. arXiv preprint arXiv:1807.03819","author":"Dehghani Mostafa","year":"2018","unstructured":"Mostafa Dehghani , Stephan Gouws , Oriol Vinyals , Jakob Uszkoreit , and \u0141ukasz Kaiser . 2018. Universal transformers. arXiv preprint arXiv:1807.03819 ( 2018 ). Mostafa Dehghani, Stephan Gouws, Oriol Vinyals, Jakob Uszkoreit, and \u0141ukasz Kaiser. 2018. Universal transformers. arXiv preprint arXiv:1807.03819 (2018)."},{"key":"e_1_3_2_1_5_1","volume-title":"Reducing transformer depth on demand with structured dropout. arXiv preprint arXiv:1909.11556","author":"Fan Angela","year":"2019","unstructured":"Angela Fan , Edouard Grave , and Armand Joulin . 2019. Reducing transformer depth on demand with structured dropout. arXiv preprint arXiv:1909.11556 ( 2019 ). Angela Fan, Edouard Grave, and Armand Joulin. 2019. Reducing transformer depth on demand with structured dropout. arXiv preprint arXiv:1909.11556 (2019)."},{"key":"e_1_3_2_1_6_1","volume-title":"Training with quantization noise for extreme model compression. arXiv preprint arXiv:2004.07320","author":"Fan Angela","year":"2020","unstructured":"Angela Fan , Pierre Stock , Benjamin Graham , Edouard Grave , R\u00e9mi Gribonval , Herve Jegou , and Armand Joulin . 2020. Training with quantization noise for extreme model compression. arXiv preprint arXiv:2004.07320 ( 2020 ). Angela Fan, Pierre Stock, Benjamin Graham, Edouard Grave, R\u00e9mi Gribonval, Herve Jegou, and Armand Joulin. 2020. Training with quantization noise for extreme model compression. arXiv preprint arXiv:2004.07320 (2020)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"e_1_3_2_1_8_1","volume-title":"Learning both weights and connections for efficient neural network. Advances in neural information processing systems 28","author":"Han Song","year":"2015","unstructured":"Song Han , Jeff Pool , John Tran , and William Dally . 2015. Learning both weights and connections for efficient neural network. Advances in neural information processing systems 28 ( 2015 ). Song Han, Jeff Pool, John Tran, and William Dally. 2015. Learning both weights and connections for efficient neural network. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"e_1_3_2_1_10_1","volume-title":"Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351","author":"Jiao Xiaoqi","year":"2019","unstructured":"Xiaoqi Jiao , Yichun Yin , Lifeng Shang , Xin Jiang , Xiao Chen , Linlin Li , Fang Wang , and Qun Liu . 2019 . Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351 (2019). Xiaoqi Jiao, Yichun Yin, Lifeng Shang, Xin Jiang, Xiao Chen, Linlin Li, Fang Wang, and Qun Liu. 2019. Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351 (2019)."},{"key":"e_1_3_2_1_11_1","volume-title":"Revealing the dark secrets of BERT. arXiv preprint arXiv:1908.08593","author":"Kovaleva Olga","year":"2019","unstructured":"Olga Kovaleva , Alexey Romanov , Anna Rogers , and Anna Rumshisky . 2019. Revealing the dark secrets of BERT. arXiv preprint arXiv:1908.08593 ( 2019 ). Olga Kovaleva, Alexey Romanov, Anna Rogers, and Anna Rumshisky. 2019. Revealing the dark secrets of BERT. arXiv preprint arXiv:1908.08593 (2019)."},{"key":"e_1_3_2_1_12_1","volume-title":"Block pruning for faster transformers. arXiv preprint arXiv:2109.04838","author":"Lagunas Fran\u00e7ois","year":"2021","unstructured":"Fran\u00e7ois Lagunas , Ella Charlaix , Victor Sanh , and Alexander M Rush . 2021. Block pruning for faster transformers. arXiv preprint arXiv:2109.04838 ( 2021 ). Fran\u00e7ois Lagunas, Ella Charlaix, Victor Sanh, and Alexander M Rush. 2021. Block pruning for faster transformers. arXiv preprint arXiv:2109.04838 (2021)."},{"key":"e_1_3_2_1_13_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan , Mingda Chen , Sebastian Goodman , Kevin Gimpel , Piyush Sharma , and Radu Soricut . 2019 . Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019). Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019)."},{"key":"e_1_3_2_1_14_1","volume-title":"Snip: Single-shot network pruning based on connection sensitivity.","author":"Lee Namhoon","year":"2018","unstructured":"Namhoon Lee , Thalaiyasingam Ajanthan , and Philip HS Torr . 2018 . Snip: Single-shot network pruning based on connection sensitivity. Namhoon Lee, Thalaiyasingam Ajanthan, and Philip HS Torr. 2018. Snip: Single-shot network pruning based on connection sensitivity."},{"key":"e_1_3_2_1_15_1","unstructured":"arXiv preprint arXiv:1810.02340 (2018).  arXiv preprint arXiv:1810.02340 (2018)."},{"key":"e_1_3_2_1_16_1","volume-title":"Learning sparse neural networks through L_0 regularization. arXiv preprint arXiv:1712.01312","author":"Louizos Christos","year":"2017","unstructured":"Christos Louizos , Max Welling , and Diederik P Kingma . 2017. Learning sparse neural networks through L_0 regularization. arXiv preprint arXiv:1712.01312 ( 2017 ). Christos Louizos, Max Welling, and Diederik P Kingma. 2017. Learning sparse neural networks through L_0 regularization. arXiv preprint arXiv:1712.01312 (2017)."},{"key":"e_1_3_2_1_17_1","volume-title":"A tensorized transformer for language modeling. Advances in neural information processing systems 32","author":"Ma Xindian","year":"2019","unstructured":"Xindian Ma , Peng Zhang , Shuai Zhang , Nan Duan , Yuexian Hou , Ming Zhou , and Dawei Song . 2019. A tensorized transformer for language modeling. Advances in neural information processing systems 32 ( 2019 ). Xindian Ma, Peng Zhang, Shuai Zhang, Nan Duan, Yuexian Hou, Ming Zhou, and Dawei Song. 2019. A tensorized transformer for language modeling. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_18_1","volume-title":"Pruning a bert-based question answering model. arXiv preprint arXiv:1910.06360 142","author":"McCarley J Scott","year":"2019","unstructured":"J Scott McCarley . 2019. Pruning a bert-based question answering model. arXiv preprint arXiv:1910.06360 142 ( 2019 ). J Scott McCarley. 2019. Pruning a bert-based question answering model. arXiv preprint arXiv:1910.06360 142 (2019)."},{"key":"e_1_3_2_1_19_1","volume-title":"Are sixteen heads really better than one? Advances in neural information processing systems 32","author":"Michel Paul","year":"2019","unstructured":"Paul Michel , Omer Levy , and Graham Neubig . 2019. Are sixteen heads really better than one? Advances in neural information processing systems 32 ( 2019 ). Paul Michel, Omer Levy, and Graham Neubig. 2019. Are sixteen heads really better than one? Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_1_21_1","first-page":"20378","article-title":"Movement pruning: Adaptive sparsity by fine-tuning","volume":"33","author":"Sanh Victor","year":"2020","unstructured":"Victor Sanh , Thomas Wolf , and Alexander Rush . 2020 . Movement pruning: Adaptive sparsity by fine-tuning . Advances in Neural Information Processing Systems 33 (2020), 20378 \u2013 20389 . Victor Sanh, Thomas Wolf, and Alexander Rush. 2020. Movement pruning: Adaptive sparsity by fine-tuning. Advances in Neural Information Processing Systems 33 (2020), 20378\u201320389.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Steffen Schneider Alexei Baevski Ronan Collobert and Michael Auli. 2019. wav2vec: Unsupervised pre-training for speech recognition.  Steffen Schneider Alexei Baevski Ronan Collobert and Michael Auli. 2019. wav2vec: Unsupervised pre-training for speech recognition.","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"e_1_3_2_1_23_1","unstructured":"arXiv preprint arXiv:1904.05862 (2019).  arXiv preprint arXiv:1904.05862 (2019)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"e_1_3_2_1_25_1","volume-title":"Patient knowledge distillation for bert model compression. arXiv preprint arXiv:1908.09355","author":"Sun Siqi","year":"2019","unstructured":"Siqi Sun , Yu Cheng , Zhe Gan , and Jingjing Liu . 2019. Patient knowledge distillation for bert model compression. arXiv preprint arXiv:1908.09355 ( 2019 ). Siqi Sun, Yu Cheng, Zhe Gan, and Jingjing Liu. 2019. Patient knowledge distillation for bert model compression. arXiv preprint arXiv:1908.09355 (2019)."},{"key":"e_1_3_2_1_26_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , \u0141ukasz Kaiser , and Illia Polosukhin . 2017. Attention is all you need. Advances in neural information processing systems 30 ( 2017 ). Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_27_1","volume-title":"Analyzing multi-head self-attention: Specialized heads do the heavy lifting, the rest can be pruned. arXiv preprint arXiv:1905.09418","author":"Voita Elena","year":"2019","unstructured":"Elena Voita , David Talbot , Fedor Moiseev , Rico Sennrich , and Ivan Titov . 2019. Analyzing multi-head self-attention: Specialized heads do the heavy lifting, the rest can be pruned. arXiv preprint arXiv:1905.09418 ( 2019 ). Elena Voita, David Talbot, Fedor Moiseev, Rico Sennrich, and Ivan Titov. 2019. Analyzing multi-head self-attention: Specialized heads do the heavy lifting, the rest can be pruned. arXiv preprint arXiv:1905.09418 (2019)."},{"key":"e_1_3_2_1_28_1","volume-title":"Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771","author":"Wolf Thomas","year":"2019","unstructured":"Thomas Wolf , Lysandre Debut , Victor Sanh , Julien Chaumond , Clement Delangue , Anthony Moi , Pierric Cistac , Tim Rault , R\u00e9mi Louf , Morgan Funtowicz , 2019. Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 ( 2019 ). Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, 2019. Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"Kushal Lakhotia, Yist Y Lin, Andy T Liu, Jiatong Shi, Xuankai Chang","author":"Chi Po-Han","year":"2021","unstructured":"Shu-wen Yang, Po-Han Chi , Yung-Sung Chuang , Cheng-I Jeff Lai , Kushal Lakhotia, Yist Y Lin, Andy T Liu, Jiatong Shi, Xuankai Chang , Guan-Ting Lin , 2021 . Superb : Speech processing universal performance benchmark. arXiv preprint arXiv:2105.01051 (2021). Shu-wen Yang, Po-Han Chi, Yung-Sung Chuang, Cheng-I Jeff Lai, Kushal Lakhotia, Yist Y Lin, Andy T Liu, Jiatong Shi, Xuankai Chang, Guan-Ting Lin, 2021. Superb: Speech processing universal performance benchmark. arXiv preprint arXiv:2105.01051 (2021)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"key":"e_1_3_2_1_31_1","volume-title":"International Conference on Machine Learning. PMLR, 26809\u201326823","author":"Zhang Qingru","year":"2022","unstructured":"Qingru Zhang , Simiao Zuo , Chen Liang , Alexander Bukharin , Pengcheng He , Weizhu Chen , and Tuo Zhao . 2022 . Platon: Pruning large transformer models with upper confidence bound of weight importance . In International Conference on Machine Learning. PMLR, 26809\u201326823 . Qingru Zhang, Simiao Zuo, Chen Liang, Alexander Bukharin, Pengcheng He, Weizhu Chen, and Tuo Zhao. 2022. Platon: Pruning large transformer models with upper confidence bound of weight importance. In International Conference on Machine Learning. PMLR, 26809\u201326823."}],"event":{"name":"SPML 2023: 2023 6th International Conference on Signal Processing and Machine Learning","acronym":"SPML 2023","location":"Tianjin China"},"container-title":["2023 6th International Conference on Signal Processing and Machine Learning (SPML)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3614008.3614020","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3614008.3614020","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:27Z","timestamp":1750178247000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3614008.3614020"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,14]]},"references-count":31,"alternative-id":["10.1145\/3614008.3614020","10.1145\/3614008"],"URL":"https:\/\/doi.org\/10.1145\/3614008.3614020","relation":{},"subject":[],"published":{"date-parts":[[2023,7,14]]},"assertion":[{"value":"2023-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}