{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:07:04Z","timestamp":1765339624492,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754988","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:56:43Z","timestamp":1761371803000},"page":"219-228","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["From Continuous to Discrete: Cross-Domain Collaborative General Speech Enhancement via Hierarchical Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7393-0425","authenticated-orcid":false,"given":"Zhaoxi","family":"Mu","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, Shaanxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6873-1683","authenticated-orcid":false,"given":"Rilin","family":"Chen","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4094-8448","authenticated-orcid":false,"given":"Andong","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Acoustics, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0031-9156","authenticated-orcid":false,"given":"Meng","family":"Yu","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Bellevue, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5117-4914","authenticated-orcid":false,"given":"Xinyu","family":"Yang","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, Shaanxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0520-6844","authenticated-orcid":false,"given":"Dong","family":"Yu","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Bellevue, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"2776","article-title":"Hi-Fi Multi-Speaker English TTS Dataset","author":"Bakhturina Evelina","year":"2021","unstructured":"Evelina Bakhturina, Vitaly Lavrukhin, Boris Ginsburg, and Yang Zhang. 2021. Hi-Fi Multi-Speaker English TTS Dataset. In Interspeech. ISCA, 2776-2780.","journal-title":"Interspeech. ISCA"},{"key":"e_1_3_2_1_2_1","first-page":"79","article-title":"Data Augmentation and Loss Normalization for Deep Noise Suppression. In SPECOM (Lecture Notes in Computer Science, Vol. 12335)","author":"Braun Sebastian","year":"2020","unstructured":"Sebastian Braun and Ivan Tashev. 2020. Data Augmentation and Loss Normalization for Deep Noise Suppression. In SPECOM (Lecture Notes in Computer Science, Vol. 12335). Springer, 79-86.","journal-title":"Springer"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Sanyuan Chen Chengyi Wang Yu Wu Ziqiang Zhang Long Zhou Shujie Liu Zhuo Chen Yanqing Liu Huaming Wang Jinyu Li et al. 2025. Neural codec language models are zero-shot text to speech synthesizers. IEEE Transactions on Audio Speech and Language Processing (2025).","DOI":"10.1109\/TASLPRO.2025.3530270"},{"key":"e_1_3_2_1_5_1","article-title":"High Fidelity Neural Audio","volume":"2023","author":"D\u00e9fossez Alexandre","year":"2023","unstructured":"Alexandre D\u00e9fossez, Jade Copet, Gabriel Synnaeve, and Yossi Adi. 2023. High Fidelity Neural Audio Compression. Trans. Mach. Learn. Res., Vol. 2023 (2023).","journal-title":"Compression. Trans. Mach. Learn. Res."},{"key":"e_1_3_2_1_6_1","first-page":"3291","article-title":"Real Time Speech Enhancement in the Waveform Domain","author":"D\u00e9fossez Alexandre","year":"2020","unstructured":"Alexandre D\u00e9fossez, Gabriel Synnaeve, and Yossi Adi. 2020. Real Time Speech Enhancement in the Waveform Domain. In INTERSPEECH. ISCA, 3291-3295.","journal-title":"INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_7_1","first-page":"2533","article-title":"PLCMOS - A Data-driven Non-intrusive Metric for The Evaluation of Packet Loss Concealment Algorithms","author":"Diener Lorenz","year":"2023","unstructured":"Lorenz Diener, Marju Purin, Sten Sootla, Ando Saabas, Robert Aichner, and Ross Cutler. 2023. PLCMOS - A Data-driven Non-intrusive Metric for The Evaluation of Packet Loss Concealment Algorithms. In INTERSPEECH. ISCA, 2533-2537.","journal-title":"INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_8_1","volume-title":"INTERSPEECH 2022 Audio Deep Packet Loss Concealment Challenge. In INTERSPEECH. ISCA, 580-584","author":"Diener Lorenz","year":"2022","unstructured":"Lorenz Diener, Sten Sootla, Solomiya Branets, Ando Saabas, Robert Aichner, and Ross Cutler. 2022. INTERSPEECH 2022 Audio Deep Packet Loss Concealment Challenge. In INTERSPEECH. ISCA, 580-584."},{"key":"e_1_3_2_1_9_1","unstructured":"Harishchandra Dubey Ashkan Aazami Vishak Gopal Babak Naderi Sebastian Braun Ross Cutler Alex Ju Mehdi Zohourian Min Tang Mehrsa Golestaneh et al. 2024. Icassp 2023 deep noise suppression challenge. IEEE Open Journal of Signal Processing (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3133208"},{"key":"e_1_3_2_1_11_1","first-page":"6633","article-title":"Fullsubnet","author":"Hao Xiang","year":"2021","unstructured":"Xiang Hao, Xiangdong Su, Radu Horaud, and Xiaofei Li. 2021. Fullsubnet: A Full-Band and Sub-Band Fusion Model for Real-Time Single-Channel Speech Enhancement. In ICASSP. IEEE, 6633-6637.","journal-title":"In ICASSP. IEEE"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"e_1_3_2_1_13_1","volume-title":"WavTokenizer: an Efficient Acoustic Discrete Codec Tokenizer for Audio Language Modeling. CoRR","author":"Ji Shengpeng","year":"2024","unstructured":"Shengpeng Ji, Ziyue Jiang, Xize Cheng, Yifu Chen, Minghui Fang, Jialong Zuo, Qian Yang, Ruiqi Li, Ziang Zhang, Xiaoda Yang, Rongjie Huang, Yidi Jiang, Qian Chen, Siqi Zheng, Wen Wang, and Zhou Zhao. 2024. WavTokenizer: an Efficient Acoustic Discrete Codec Tokenizer for Audio Language Modeling. CoRR, Vol. abs\/2408.16532 (2024)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Boyi Kang Xinfa Zhu Zihan Zhang Zhen Ye Mingshuai Liu Ziqian Wang Yike Zhu Guobin Ma Jun Chen Longshuai Xiao et al. 2025. LLaSE-G1: Incentivizing Generalization Capability for LLaMA-based Speech Enhancement. arXiv preprint arXiv:2503.00493 (2025).","DOI":"10.18653\/v1\/2025.acl-long.651"},{"key":"e_1_3_2_1_15_1","unstructured":"Rithesh Kumar Prem Seetharaman Alejandro Luebs Ishaan Kumar and Kundan Kumar. 2023. High-Fidelity Audio Compression with Improved RVQGAN. In NeurIPS."},{"key":"e_1_3_2_1_16_1","volume-title":"Tianyu Fan, and Eng Siong Chng.","author":"Li Haoyang","year":"2025","unstructured":"Haoyang Li, Jia Qi Yip, Tianyu Fan, and Eng Siong Chng. 2025. Speech Enhancement Using Continuous Embeddings of Neural Audio Codec. CoRR, Vol. abs\/2502.16240 (2025)."},{"key":"e_1_3_2_1_17_1","first-page":"585","article-title":"End-to-End Multi-Loss Training for Low Delay Packet Loss Concealment","author":"Li Nan","year":"2022","unstructured":"Nan Li, Xiguang Zheng, Chen Zhang, Liang Guo, and Bing Yu. 2022. End-to-End Multi-Loss Training for Low Delay Packet Loss Concealment. In INTERSPEECH. ISCA, 585-589.","journal-title":"INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_18_1","volume-title":"MaskSR: Masked Language Model for Full-band Speech Restoration. CoRR","author":"Li Xu","year":"2092","unstructured":"Xu Li, Qirui Wang, and Xiaoyu Liu. 2024. MaskSR: Masked Language Model for Full-band Speech Restoration. CoRR, Vol. abs\/2406.02092 (2024)."},{"key":"e_1_3_2_1_19_1","first-page":"575","article-title":"PLCNet","author":"Liu Baiyun","year":"2022","unstructured":"Baiyun Liu, Qi Song, Mingxue Yang, Wuwen Yuan, and Tianbao Wang. 2022b. PLCNet: Real-time Packet Loss Concealment with Semi-supervised Generative Adversarial Network. In INTERSPEECH. ISCA, 575-579.","journal-title":"In INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_20_1","first-page":"1076","article-title":"Audiosr: Versatile Audio Super-Resolution at Scale","author":"Liu Haohe","year":"2024","unstructured":"Haohe Liu, Ke Chen, Qiao Tian, Wenwu Wang, and Mark D. Plumbley. 2024. Audiosr: Versatile Audio Super-Resolution at Scale. In ICASSP. IEEE, 1076-1080.","journal-title":"ICASSP. IEEE"},{"key":"e_1_3_2_1_21_1","first-page":"4232","article-title":"VoiceFixer","author":"Liu Haohe","year":"2022","unstructured":"Haohe Liu, Xubo Liu, Qiuqiang Kong, Qiao Tian, Yan Zhao, DeLiang Wang, Chuanzeng Huang, and Yuxuan Wang. 2022a. VoiceFixer: A Unified Framework for High-Fidelity Speech Restoration. In INTERSPEECH. ISCA, 4232-4236.","journal-title":"A Unified Framework for High-Fidelity Speech Restoration. In INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_22_1","unstructured":"Ziyin Liu Tilman Hartwig and Masahito Ueda. 2020. Neural Networks Fail to Learn Periodic Functions and How to Fix It. In NeurIPS."},{"key":"e_1_3_2_1_23_1","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In ICLR (Poster). OpenReview.net."},{"key":"e_1_3_2_1_24_1","first-page":"481","article-title":"Music Source Separation With Band-Split Rope Transformer","author":"Lu Wei Tsung","year":"2024","unstructured":"Wei Tsung Lu, Ju-Chiang Wang, Qiuqiang Kong, and Yun-Ning Hung. 2024. Music Source Separation With Band-Split Rope Transformer. In ICASSP. IEEE, 481-485.","journal-title":"ICASSP. IEEE"},{"key":"e_1_3_2_1_25_1","first-page":"3834","article-title":"MP-SENet: A Speech Enhancement Model with Parallel Denoising of Magnitude and Phase Spectra","author":"Lu Ye-Xin","year":"2023","unstructured":"Ye-Xin Lu, Yang Ai, and Zhen-Hua Ling. 2023. MP-SENet: A Speech Enhancement Model with Parallel Denoising of Magnitude and Phase Spectra. In INTERSPEECH. ISCA, 3834-3838.","journal-title":"INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_26_1","first-page":"2127","article-title":"NISQA","author":"Mittag Gabriel","year":"2021","unstructured":"Gabriel Mittag, Babak Naderi, Assmaa Chehadi, and Sebastian M\u00f6ller. 2021. NISQA: A Deep CNN-Self-Attention Model for Multidimensional Speech Quality Prediction with Crowdsourced Datasets. In Interspeech. ISCA, 2127-2131.","journal-title":"In Interspeech. ISCA"},{"key":"e_1_3_2_1_27_1","first-page":"886","article-title":"Dnsmos P.835: A Non-Intrusive Perceptual Objective Speech Quality Metric to Evaluate Noise Suppressors","author":"Reddy Chandan K. A.","year":"2022","unstructured":"Chandan K. A. Reddy, Vishak Gopal, and Ross Cutler. 2022. Dnsmos P.835: A Non-Intrusive Perceptual Objective Speech Quality Metric to Evaluate Noise Suppressors. In ICASSP. IEEE, 886-890.","journal-title":"ICASSP. IEEE"},{"key":"e_1_3_2_1_28_1","volume-title":"The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets, Subjective Testing Framework, and Challenge Results. In INTERSPEECH. ISCA, 2492-2496","author":"Reddy Chandan K. A.","year":"2020","unstructured":"Chandan K. A. Reddy, Vishak Gopal, Ross Cutler, Ebrahim Beyrami, Roger Cheng, Harishchandra Dubey, Sergiy Matusevych, Robert Aichner, Ashkan Aazami, Sebastian Braun, Puneet Rana, Sriram Srinivasan, and Johannes Gehrke. 2020. The INTERSPEECH 2020 Deep Noise Suppression Challenge: Datasets, Subjective Testing Framework, and Challenge Results. In INTERSPEECH. ISCA, 2492-2496."},{"key":"e_1_3_2_1_29_1","volume-title":"SpeechBERTScore: Reference-Aware Automatic Evaluation of Speech Generation Leveraging NLP Evaluation Metrics. CoRR","author":"Saeki Takaaki","year":"2024","unstructured":"Takaaki Saeki, Soumi Maiti, Shinnosuke Takamichi, Shinji Watanabe, and Hiroshi Saruwatari. 2024. SpeechBERTScore: Reference-Aware Automatic Evaluation of Speech Generation Leveraging NLP Evaluation Metrics. CoRR, Vol. abs\/2401.16812 (2024)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"e_1_3_2_1_31_1","volume-title":"LLaMA: Open and Efficient Foundation Language Models. CoRR","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, Aur\u00e9lien Rodriguez, Armand Joulin, Edouard Grave, and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. CoRR, Vol. abs\/2302.13971 (2023)."},{"key":"e_1_3_2_1_32_1","first-page":"570","article-title":"Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model","author":"Valin Jean-Marc","year":"2022","unstructured":"Jean-Marc Valin, Ahmed Mustafa, Christopher Montgomery, Timothy B. Terriberry, Michael Klingbeil, Paris Smaragdis, and Arvindh Krishnaswamy. 2022. Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model. In INTERSPEECH. ISCA, 570-574.","journal-title":"INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_33_1","first-page":"15","article-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit. University of Edinburgh","volume":"6","author":"Veaux Christophe","year":"2017","unstructured":"Christophe Veaux, Junichi Yamagishi, Kirsten MacDonald, et al., 2017. CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit. University of Edinburgh. The Centre for Speech Technology Research (CSTR), Vol. 6 (2017), 15.","journal-title":"The Centre for Speech Technology Research (CSTR)"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3304482"},{"key":"e_1_3_2_1_35_1","first-page":"11561","article-title":"SELM: Speech Enhancement using Discrete Tokens and Language Models","author":"Wang Ziqian","year":"2024","unstructured":"Ziqian Wang, Xinfa Zhu, Zihan Zhang, Yuanjun Lv, Ning Jiang, Guoqing Zhao, and Lei Xie. 2024. SELM: Speech Enhancement using Discrete Tokens and Language Models. In ICASSP. IEEE, 11561-11565.","journal-title":"ICASSP. IEEE"},{"key":"e_1_3_2_1_36_1","first-page":"1368","article-title":"WHAM!: Extending Speech Separation to Noisy Environments","author":"Wichern Gordon","year":"2019","unstructured":"Gordon Wichern, Joe Antognini, Michael Flynn, Licheng Richard Zhu, Emmett McQuinn, Dwight Crow, Ethan Manilow, and Jonathan Le Roux. 2019. WHAM!: Extending Speech Separation to Noisy Environments. In INTERSPEECH. ISCA, 1368-1372.","journal-title":"INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_37_1","first-page":"4784","article-title":"Vision Transformer with Deformable Attention","author":"Xia Zhuofan","year":"2022","unstructured":"Zhuofan Xia, Xuran Pan, Shiji Song, Li Erran Li, and Gao Huang. 2022. Vision Transformer with Deformable Attention. In CVPR. IEEE, 4784-4793.","journal-title":"CVPR. IEEE"},{"key":"e_1_3_2_1_38_1","volume-title":"BigCodec: Pushing the Limits of Low-Bitrate Neural Speech Codec. CoRR","author":"Xin Detai","year":"2024","unstructured":"Detai Xin, Xu Tan, Shinnosuke Takamichi, and Hiroshi Saruwatari. 2024. BigCodec: Pushing the Limits of Low-Bitrate Neural Speech Codec. CoRR, Vol. abs\/2409.05377 (2024)."},{"key":"e_1_3_2_1_39_1","first-page":"1170","article-title":"Genhancer: High-fidelity speech enhancement via generative modeling on discrete codec tokens","volume":"2024","author":"Yang Haici","year":"2024","unstructured":"Haici Yang, Jiaqi Su, Minje Kim, and Zeyu Jin. 2024. Genhancer: High-fidelity speech enhancement via generative modeling on discrete codec tokens. In Proc. Interspeech 2024. 1170-1174.","journal-title":"Proc. Interspeech"},{"key":"e_1_3_2_1_40_1","volume-title":"Chng Eng Siong, and Lei Xie","author":"Yao Jixun","year":"2025","unstructured":"Jixun Yao, Hexin Liu, Chen Chen, Yuchen Hu, Chng Eng Siong, and Lei Xie. 2025. GenSE: Generative Speech Enhancement via Language Models using Hierarchical Modeling. CoRR, Vol. abs\/2502.02942 (2025)."},{"key":"e_1_3_2_1_41_1","volume-title":"Llasa: Scaling Train-Time and Inference-Time Compute for Llama-based Speech Synthesis. CoRR","author":"Ye Zhen","year":"2025","unstructured":"Zhen Ye, Xinfa Zhu, Chi-Min Chan, Xinsheng Wang, Xu Tan, Jiahe Lei, Yi Peng, Haohe Liu, Yizhu Jin, Zheqi Dai, Hongzhan Lin, Jianyi Chen, Xingjian Du, Liumeng Xue, Yunlin Chen, Zhifei Li, Lei Xie, Qiuqiang Kong, Yike Guo, and Wei Xue. 2025. Llasa: Scaling Train-Time and Inference-Time Compute for Llama-based Speech Synthesis. CoRR, Vol. abs\/2502.04128 (2025)."},{"key":"e_1_3_2_1_42_1","volume-title":"Eng Siong Chng, and Bin Ma","author":"Yip Jia Qi","year":"2024","unstructured":"Jia Qi Yip, Shengkui Zhao, Dianwen Ng, Eng Siong Chng, and Bin Ma. 2024. Towards audio codec-based speech separation. arXiv preprint arXiv:2406.12434 (2024)."},{"key":"e_1_3_2_1_43_1","first-page":"1","article-title":"Efficient Monaural Speech Enhancement with Universal Sample Rate Band-Split RNN","author":"Yu Jianwei","year":"2023","unstructured":"Jianwei Yu and Yi Luo. 2023. Efficient Monaural Speech Enhancement with Universal Sample Rate Band-Split RNN. In ICASSP. IEEE, 1-5.","journal-title":"ICASSP. IEEE"},{"key":"e_1_3_2_1_44_1","volume-title":"Joshua Ainslie, Chris Alberti, Santiago Onta n, \u00f3n","author":"Zaheer Manzil","year":"2020","unstructured":"Manzil Zaheer, Guru Guruganesh, Kumar Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Onta n, \u00f3n, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, and Amr Ahmed. 2020. Big Bird: Transformers for Longer Sequences. In NeurIPS."},{"key":"e_1_3_2_1_45_1","first-page":"1526","article-title":"LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech","author":"Zen Heiga","year":"2019","unstructured":"Heiga Zen, Viet Dang, Rob Clark, Yu Zhang, Ron J. Weiss, Ye Jia, Zhifeng Chen, and Yonghui Wu. 2019. LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech. In INTERSPEECH. ISCA, 1526-1530.","journal-title":"INTERSPEECH. ISCA"},{"key":"e_1_3_2_1_46_1","volume-title":"AnyEnhance: A Unified Generative Model with Prompt-Guidance and Self-Critic for Voice Enhancement. CoRR","author":"Zhang Junan","year":"2025","unstructured":"Junan Zhang, Jing Yang, Zihao Fang, Yuancheng Wang, Zehua Zhang, Zhuo Wang, Fan Fan, and Zhizheng Wu. 2025. AnyEnhance: A Unified Generative Model with Prompt-Guidance and Self-Critic for Voice Enhancement. CoRR, Vol. abs\/2501.15417 (2025)."},{"key":"e_1_3_2_1_47_1","volume-title":"URGENT Challenge: Universality, Robustness, and Generalizability For Speech Enhancement. CoRR","author":"Zhang Wangyou","year":"2024","unstructured":"Wangyou Zhang, Robin Scheibler, Kohei Saijo, Samuele Cornell, Chenda Li, Zhaoheng Ni, Anurag Kumar, Jan Pirklbauer, Marvin Sach, Shinji Watanabe, Tim Fingscheidt, and Yanmin Qian. 2024a. URGENT Challenge: Universality, Robustness, and Generalizability For Speech Enhancement. CoRR, Vol. abs\/2406.04660 (2024)."},{"key":"e_1_3_2_1_48_1","volume-title":"Bs-Plcnet: Band-Split Packet Loss Concealment Network with Multi-Task Learning Framework and Multi-Discriminators. In ICASSP Workshops. IEEE, 23-24","author":"Zhang Zihan","year":"2024","unstructured":"Zihan Zhang, Jiayao Sun, Xianjun Xia, Chuanzeng Huang, Yijian Xiao, and Lei Xie. 2024b. Bs-Plcnet: Band-Split Packet Loss Concealment Network with Multi-Task Learning Framework and Multi-Discriminators. In ICASSP Workshops. IEEE, 23-24."},{"key":"e_1_3_2_1_49_1","first-page":"9281","article-title":"FRCRN","author":"Zhao Shengkui","year":"2022","unstructured":"Shengkui Zhao, Bin Ma, Karn N. Watcharasupat, and Woon-Seng Gan. 2022. FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement. In ICASSP. IEEE, 9281-9285.","journal-title":"In ICASSP. IEEE"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754988","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:03:42Z","timestamp":1765339422000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754988"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":49,"alternative-id":["10.1145\/3746027.3754988","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754988","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}