{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:09:32Z","timestamp":1750219772692,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,7,14]],"date-time":"2023-07-14T00:00:00Z","timestamp":1689292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020AAA0104500"],"award-info":[{"award-number":["2020AAA0104500"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276153"],"award-info":[{"award-number":["62276153"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,14]]},"DOI":"10.1145\/3614008.3614015","type":"proceedings-article","created":{"date-parts":[[2023,10,17]],"date-time":"2023-10-17T18:19:52Z","timestamp":1697566792000},"page":"45-50","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DistilALHuBERT: A Distilled Parameter Sharing Audio Representation Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9167-2459","authenticated-orcid":false,"given":"Haoyu","family":"Wang","sequence":"first","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0934-1572","authenticated-orcid":false,"given":"Siyuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6502-9262","authenticated-orcid":false,"given":"Yaguang","family":"Gong","sequence":"additional","affiliation":[{"name":"TAL Education, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3841-1959","authenticated-orcid":false,"given":"Wei-Qiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.393"},{"key":"e_1_3_2_1_2_1","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski , Yuhao Zhou , Abdelrahman Mohamed , and Michael Auli . 2020 . wav2vec 2.0: A framework for self-supervised learning of speech representations . Advances in Neural Information Processing Systems 33 (2020), 12449 \u2013 12460 . Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in Neural Information Processing Systems 33 (2020), 12449\u201312460.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747490"},{"key":"e_1_3_2_1_4_1","unstructured":"Sanyuan Chen Chengyi Wang Zhengyang Chen Yu Wu Shujie Liu Zhuo Chen Jinyu Li Naoyuki Kanda Takuya Yoshioka Xiong Xiao Jian Wu Long Zhou Shuo Ren Yanmin Qian Yao Qian Jian Wu Michael Zeng and Furu Wei. 2021. WavLM: Large-Scale Self-Supervised Pre-training for Full Stack Speech Processing. (2021). arXiv:2110.13900 [cs.CL]  Sanyuan Chen Chengyi Wang Zhengyang Chen Yu Wu Shujie Liu Zhuo Chen Jinyu Li Naoyuki Kanda Takuya Yoshioka Xiong Xiao Jian Wu Long Zhou Shuo Ren Yanmin Qian Yao Qian Jian Wu Michael Zeng and Furu Wei. 2021. WavLM: Large-Scale Self-Supervised Pre-training for Full Stack Speech Processing. (2021). arXiv:2110.13900 [cs.CL]"},{"key":"e_1_3_2_1_5_1","unstructured":"Po-Han Chi Pei-Hung Chung Tsung-Han Wu Chun-Cheng Hsieh Yen-Hao Chen Shang-Wen Li and Hung-yi Lee. 2021. Audio ALBERT: A Lite BERT for Self-supervised Learning of Audio Representation. http:\/\/arxiv.org\/abs\/2005.08575 arXiv:2005.08575 [cs eess].  Po-Han Chi Pei-Hung Chung Tsung-Han Wu Chun-Cheng Hsieh Yen-Hao Chen Shang-Wen Li and Hung-yi Lee. 2021. Audio ALBERT: A Lite BERT for Self-supervised Learning of Audio Representation. http:\/\/arxiv.org\/abs\/2005.08575 arXiv:2005.08575 [cs eess]."},{"key":"e_1_3_2_1_6_1","volume-title":"An unsupervised autoregressive model for speech representation learning. arXiv preprint arXiv:1904.03240","author":"Chung Yu-An","year":"2019","unstructured":"Yu-An Chung , Wei-Ning Hsu , Hao Tang , and James Glass . 2019. An unsupervised autoregressive model for speech representation learning. arXiv preprint arXiv:1904.03240 ( 2019 ). Yu-An Chung, Wei-Ning Hsu, Hao Tang, and James Glass. 2019. An unsupervised autoregressive model for speech representation learning. arXiv preprint arXiv:1904.03240 (2019)."},{"key":"#cr-split#-e_1_3_2_1_7_1.1","unstructured":"Mostafa Dehghani Stephan Gouws Oriol Vinyals Jakob Uszkoreit and \u0141ukasz Kaiser. 2019. Universal Transformers. https:\/\/doi.org\/10.48550\/arXiv.1807.03819 arXiv:1807.03819 [cs stat]. 10.48550\/arXiv.1807.03819"},{"key":"#cr-split#-e_1_3_2_1_7_1.2","unstructured":"Mostafa Dehghani Stephan Gouws Oriol Vinyals Jakob Uszkoreit and \u0141ukasz Kaiser. 2019. Universal Transformers. https:\/\/doi.org\/10.48550\/arXiv.1807.03819 arXiv:1807.03819 [cs stat]."},{"key":"e_1_3_2_1_8_1","volume-title":"CTCBERT: Advancing Hidden-unit BERT with CTC Objectives. arXiv preprint arXiv:2210.08603","author":"Fan Ruchao","year":"2022","unstructured":"Ruchao Fan , Yiming Wang , Yashesh Gaur , and Jinyu Li . 2022 . CTCBERT: Advancing Hidden-unit BERT with CTC Objectives. arXiv preprint arXiv:2210.08603 (2022). Ruchao Fan, Yiming Wang, Yashesh Gaur, and Jinyu Li. 2022. CTCBERT: Advancing Hidden-unit BERT with CTC Objectives. arXiv preprint arXiv:2210.08603 (2022)."},{"key":"e_1_3_2_1_10_1","volume-title":"What do compressed deep neural networks forget? arXiv preprint arXiv:1911.05248","author":"Hooker Sara","year":"2019","unstructured":"Sara Hooker , Aaron Courville , Gregory Clark , Yann Dauphin , and Andrea Frome . 2019. What do compressed deep neural networks forget? arXiv preprint arXiv:1911.05248 ( 2019 ). Sara Hooker, Aaron Courville, Gregory Clark, Yann Dauphin, and Andrea Frome. 2019. What do compressed deep neural networks forget? arXiv preprint arXiv:1911.05248 (2019)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10339"},{"key":"e_1_3_2_1_13_1","volume-title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations","author":"Lan Zhenzhong","year":"2020","unstructured":"Zhenzhong Lan , Mingda Chen , Sebastian Goodman , Kevin Gimpel , Piyush Sharma , and Radu Soricut . 2020 . ALBERT: A Lite BERT for Self-supervised Learning of Language Representations . http:\/\/arxiv.org\/abs\/1909.11942 arXiv:1909.11942 [cs]. Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2020. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. http:\/\/arxiv.org\/abs\/1909.11942 arXiv:1909.11942 [cs]."},{"key":"e_1_3_2_1_14_1","volume-title":"FitHuBERT: Going Thinner and Deeper for Knowledge Distillation of Speech Self-Supervised Learning. arXiv preprint arXiv:2207.00555","author":"Lee Yeonghyeon","year":"2022","unstructured":"Yeonghyeon Lee , Kangwook Jang , Jahyun Goo , Youngmoon Jung , and Hoirin Kim . 2022. FitHuBERT: Going Thinner and Deeper for Knowledge Distillation of Speech Self-Supervised Learning. arXiv preprint arXiv:2207.00555 ( 2022 ). Yeonghyeon Lee, Kangwook Jang, Jahyun Goo, Youngmoon Jung, and Hoirin Kim. 2022. FitHuBERT: Going Thinner and Deeper for Knowledge Distillation of Speech Self-Supervised Learning. arXiv preprint arXiv:2207.00555 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"Non-autoregressive predictive coding for learning speech representations from local dependencies. arXiv preprint arXiv:2011.00406","author":"Liu Alexander H","year":"2020","unstructured":"Alexander H Liu , Yu-An Chung , and James Glass . 2020. Non-autoregressive predictive coding for learning speech representations from local dependencies. arXiv preprint arXiv:2011.00406 ( 2020 ). Alexander H Liu, Yu-An Chung, and James Glass. 2020. Non-autoregressive predictive coding for learning speech representations from local dependencies. arXiv preprint arXiv:2011.00406 (2020)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3095662"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054458"},{"key":"e_1_3_2_1_18_1","volume-title":"Clifton","author":"Nouriborji Mohammadmahdi","year":"2022","unstructured":"Mohammadmahdi Nouriborji , Omid Rohanian , Samaneh Kouchaki , and David A . Clifton . 2022 . MiniALBERT: Model Distillation via Parameter-Efficient Recursive Transformers . http:\/\/arxiv.org\/abs\/2210.06425 arXiv:2210.06425 [cs]. Mohammadmahdi Nouriborji, Omid Rohanian, Samaneh Kouchaki, and David A. Clifton. 2022. MiniALBERT: Model Distillation via Parameter-Efficient Recursive Transformers. http:\/\/arxiv.org\/abs\/2210.06425 arXiv:2210.06425 [cs]."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_1_20_1","volume-title":"Antoine Chassang, Carlo Gatta, and Yoshua Bengio.","author":"Romero Adriana","year":"2014","unstructured":"Adriana Romero , Nicolas Ballas , Samira Ebrahimi Kahou , Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2014 . Fitnets : Hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014). Adriana Romero, Nicolas Ballas, Samira Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2014. Fitnets: Hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)."},{"key":"e_1_3_2_1_21_1","volume-title":"wav2vec: Unsupervised pre-training for speech recognition. arXiv preprint arXiv:1904.05862","author":"Schneider Steffen","year":"2019","unstructured":"Steffen Schneider , Alexei Baevski , Ronan Collobert , and Michael Auli . 2019. wav2vec: Unsupervised pre-training for speech recognition. arXiv preprint arXiv:1904.05862 ( 2019 ). Steffen Schneider, Alexei Baevski, Ronan Collobert, and Michael Auli. 2019. wav2vec: Unsupervised pre-training for speech recognition. arXiv preprint arXiv:1904.05862 (2019)."},{"key":"e_1_3_2_1_22_1","volume-title":"Kushal Lakhotia, Yist Y Lin, Andy T Liu, Jiatong Shi, Xuankai Chang","author":"Chi Po-Han","year":"2021","unstructured":"Shu-wen Yang, Po-Han Chi , Yung-Sung Chuang , Cheng-I Jeff Lai , Kushal Lakhotia, Yist Y Lin, Andy T Liu, Jiatong Shi, Xuankai Chang , Guan-Ting Lin , 2021 . Superb : Speech processing universal performance benchmark. arXiv preprint arXiv:2105.01051 (2021). Shu-wen Yang, Po-Han Chi, Yung-Sung Chuang, Cheng-I Jeff Lai, Kushal Lakhotia, Yist Y Lin, Andy T Liu, Jiatong Shi, Xuankai Chang, Guan-Ting Lin, 2021. Superb: Speech processing universal performance benchmark. arXiv preprint arXiv:2105.01051 (2021)."},{"key":"e_1_3_2_1_23_1","series-title":"Journal of physics: Conference series","volume-title":"An overview of overfitting and its solutions","author":"Ying Xue","year":"2022","unstructured":"Xue Ying . 2019. An overview of overfitting and its solutions . In Journal of physics: Conference series , Vol. 1168 . IOP Publishing , 02 2022 . Xue Ying. 2019. An overview of overfitting and its solutions. In Journal of physics: Conference series, Vol. 1168. IOP Publishing, 022022."},{"key":"e_1_3_2_1_25_1","volume-title":"Speechlm: Enhanced speech pre-training with unpaired textual data. arXiv preprint arXiv:2209.15329","author":"Zhang Ziqiang","year":"2022","unstructured":"Ziqiang Zhang , Sanyuan Chen , Long Zhou , Yu Wu , Shuo Ren , Shujie Liu , Zhuoyuan Yao , Xun Gong , Lirong Dai , Jinyu Li , 2022 . Speechlm: Enhanced speech pre-training with unpaired textual data. arXiv preprint arXiv:2209.15329 (2022). Ziqiang Zhang, Sanyuan Chen, Long Zhou, Yu Wu, Shuo Ren, Shujie Liu, Zhuoyuan Yao, Xun Gong, Lirong Dai, Jinyu Li, 2022. Speechlm: Enhanced speech pre-training with unpaired textual data. arXiv preprint arXiv:2209.15329 (2022)."},{"key":"#cr-split#-e_1_3_2_1_26_1.1","doi-asserted-by":"crossref","unstructured":"Jinming Zhao Ruichen Li Qin Jin Xinchao Wang and Haizhou Li. 2022. Memobert: Pre-Training Model with Prompt-Based Learning for Multimodal Emotion Recognition. In ICASSP 2022 - 2022 IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP). 4703-4707. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746910 ISSN: 2379-190X. 10.1109\/ICASSP43922.2022.9746910","DOI":"10.1109\/ICASSP43922.2022.9746910"},{"key":"#cr-split#-e_1_3_2_1_26_1.2","doi-asserted-by":"crossref","unstructured":"Jinming Zhao Ruichen Li Qin Jin Xinchao Wang and Haizhou Li. 2022. Memobert: Pre-Training Model with Prompt-Based Learning for Multimodal Emotion Recognition. In ICASSP 2022 - 2022 IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP). 4703-4707. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746910 ISSN: 2379-190X.","DOI":"10.1109\/ICASSP43922.2022.9746910"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3184480"},{"key":"e_1_3_2_1_28_1","volume-title":"Eng Siong Chng, and Bin Ma","author":"Zhao Yingzhu","year":"2020","unstructured":"Yingzhu Zhao , Chongjia Ni , Cheung-Chi Leung , Shafiq Joty , Eng Siong Chng, and Bin Ma . 2020 . Universal Speech Transformer. In Interspeech 2020. ISCA , 5021\u20135025. https:\/\/doi.org\/10.21437\/Interspeech.2020-1716 10.21437\/Interspeech.2020-1716 Yingzhu Zhao, Chongjia Ni, Cheung-Chi Leung, Shafiq Joty, Eng Siong Chng, and Bin Ma. 2020. Universal Speech Transformer. In Interspeech 2020. ISCA, 5021\u20135025. https:\/\/doi.org\/10.21437\/Interspeech.2020-1716"}],"event":{"name":"SPML 2023: 2023 6th International Conference on Signal Processing and Machine Learning","acronym":"SPML 2023","location":"Tianjin China"},"container-title":["2023 6th International Conference on Signal Processing and Machine Learning (SPML)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3614008.3614015","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3614008.3614015","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:27Z","timestamp":1750178247000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3614008.3614015"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,14]]},"references-count":28,"alternative-id":["10.1145\/3614008.3614015","10.1145\/3614008"],"URL":"https:\/\/doi.org\/10.1145\/3614008.3614015","relation":{},"subject":[],"published":{"date-parts":[[2023,7,14]]},"assertion":[{"value":"2023-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}