{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T11:05:37Z","timestamp":1779793537022,"version":"3.53.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T00:00:00Z","timestamp":1773792000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T00:00:00Z","timestamp":1773792000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100014718","name":"Innovative Research Group Project of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62332019"],"award-info":[{"award-number":["62332019"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","award":["2023YFF1203900"],"award-info":[{"award-number":["2023YFF1203900"]}],"id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","award":["2023YFF1203903"],"award-info":[{"award-number":["2023YFF1203903"]}],"id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005090","name":"Beijing Nova Program","doi-asserted-by":"publisher","award":["20240484513"],"award-info":[{"award-number":["20240484513"]}],"id":[{"id":"10.13039\/501100005090","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s10489-026-07150-z","type":"journal-article","created":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T01:50:33Z","timestamp":1773798633000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DBMIF: a deep balanced multimodal iterative fusion framework for air- and bone-conduction speech enhancement"],"prefix":"10.1007","volume":"56","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-3205-6266","authenticated-orcid":false,"given":"Yilei","family":"Wu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Changyan","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xingyu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yakun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chengshi","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shuang","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ye","family":"Yan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Erwei","family":"Yin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,3,18]]},"reference":[{"issue":"10","key":"7150_CR1","doi-asserted-by":"publisher","first-page":"1702","DOI":"10.1109\/TASLP.2018.2853117","volume":"26","author":"D Wang","year":"2018","unstructured":"Wang D, Chen J (2018) Supervised speech separation based on deep learning: An overview. IEEE\/ACM transactions on audio speech and language processing 26(10):1702\u20131726. https:\/\/doi.org\/10.1109\/TASLP.2018.2853117","journal-title":"IEEE\/ACM transactions on audio speech and language processing"},{"issue":"5","key":"7150_CR2","doi-asserted-by":"publisher","first-page":"2751","DOI":"10.1121\/10.0019341","volume":"153","author":"EW Healy","year":"2023","unstructured":"Healy EW, Johnson EM, Pandey A, Wang D (2023) Progress made in the efficacy and viability of deep-learning-based noise reduction. The Journal of the Acoustical Society of America 153(5):2751\u20132751. https:\/\/doi.org\/10.1121\/10.0019341","journal-title":"The Journal of the Acoustical Society of America"},{"key":"7150_CR3","doi-asserted-by":"publisher","DOI":"10.1109\/THMS.2025.3585165","author":"D Zhou","year":"2025","unstructured":"Zhou D, Zhang Y, Wu J, Zhang X, Xie L, Yin E (2025) Ave speech: A comprehensive multi-modal dataset for speech recognition integrating audio, visual, and electromyographic signals. IEEE Transactions on Human-Machine Systems. https:\/\/doi.org\/10.1109\/THMS.2025.3585165","journal-title":"IEEE Transactions on Human-Machine Systems"},{"key":"7150_CR4","doi-asserted-by":"publisher","first-page":"725","DOI":"10.1109\/OJSP.2024.3378602","volume":"5","author":"H Dubey","year":"2024","unstructured":"Dubey H, Aazami A, Gopal V, Naderi B, Braun S, Cutler R, Ju A, Zohourian M, Tang M, Golestaneh M et al (2024) Icassp 2023 deep noise suppression challenge. IEEE Open J Signal Proc 5:725\u2013737. https:\/\/doi.org\/10.1109\/OJSP.2024.3378602","journal-title":"IEEE Open J Signal Proc"},{"issue":"18","key":"7150_CR5","doi-asserted-by":"publisher","first-page":"5050","DOI":"10.3390\/s20185050","volume":"20","author":"Y Zhou","year":"2020","unstructured":"Zhou Y, Chen Y, Ma Y, Liu H (2020) A real-time dual-microphone speech enhancement algorithm assisted by bone conduction sensor. Sensors 20(18):5050. https:\/\/doi.org\/10.3390\/s20185050","journal-title":"Sensors"},{"key":"7150_CR6","doi-asserted-by":"publisher","unstructured":"Huang B, Gong Y, Sun J, Shen Y (2017) A wearable bone-conducted speech enhancement system for strong background noises. In: 2017 18th International Conference on Electronic Packaging Technology (ICEPT), pp 1682\u20131684. https:\/\/doi.org\/10.1109\/ICEPT.2017.8046759","DOI":"10.1109\/ICEPT.2017.8046759"},{"issue":"1","key":"7150_CR7","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1159\/000314282","volume":"16","author":"T Ito","year":"2010","unstructured":"Ito T, R\u00f6\u00f6sli C, Kim C, Sim J, Huber A, Probst R (2010) Bone conduction thresholds and skull vibration measured on the teeth during stimulation at different sites on the human head. Audiology and Neurotology 16(1):12\u201322. https:\/\/doi.org\/10.1159\/000314282","journal-title":"Audiology and Neurotology"},{"key":"7150_CR8","doi-asserted-by":"publisher","unstructured":"Nakajima Y, Kashioka H, Shikano K, Campbell N (2003) Non-audible murmur recognition input interface using stethoscopic microphone attached to the skin. In: 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings. (ICASSP\u201903) 5:708. https:\/\/doi.org\/10.1109\/ICASSP.2003.1200069","DOI":"10.1109\/ICASSP.2003.1200069"},{"key":"7150_CR9","doi-asserted-by":"publisher","unstructured":"Kondo K, Fujita T, Nakagawa K (2006) On equalization of bone conducted speech for improved speech quality. In: 2006 IEEE international symposium on signal processing and information technology, pp 426\u2013431. https:\/\/doi.org\/10.1109\/ISSPIT.2006.270839","DOI":"10.1109\/ISSPIT.2006.270839"},{"key":"7150_CR10","doi-asserted-by":"publisher","unstructured":"Pan Q, Pan Y, Zhou J, Wang H, Tao L, Kwan HK (2022) Cyclegan with dual adversarial loss for bone-conducted speech enhancement. In: TENCON 2022\u20132022 IEEE Region 10 Conference (TENCON), pp 1\u20134. https:\/\/doi.org\/10.1109\/TENCON55691.2022.9977942","DOI":"10.1109\/TENCON55691.2022.9977942"},{"key":"7150_CR11","doi-asserted-by":"publisher","unstructured":"Hauret J, Joubaud T, Zimpfer V, Bavu \u00c9 (2023) Eben: Extreme bandwidth extension network applied to speech signals captured with noise-resilient body-conduction microphones. In: ICASSP 2023\u20132023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096301","DOI":"10.1109\/ICASSP49357.2023.10096301"},{"key":"7150_CR12","doi-asserted-by":"publisher","unstructured":"Hao B, Zhou D, Li X, Zhang X, Xie L, Wu J, Yin E (2025) Lipgen: Viseme-guided lip video generation for enhancing visual speech recognition. In: IEEE international conference on acoustics, speech, and signal processing, pp 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49660.2025.10889163","DOI":"10.1109\/ICASSP49660.2025.10889163"},{"key":"7150_CR13","unstructured":"Dusan SV, Lindahl A, Andersen EB (2016) System and method of mixing accelerometer and microphone signals to improve voice quality in a mobile device. Google Patents. US Patent 9,363,596"},{"key":"7150_CR14","doi-asserted-by":"crossref","unstructured":"Lee C-H, Rao, BD, Garudadri H (2018) Bone-conduction sensor assisted noise estimation for improved speech enhancement. In: Interspeech, 2018:1180","DOI":"10.21437\/Interspeech.2018-1046"},{"key":"7150_CR15","doi-asserted-by":"publisher","first-page":"1035","DOI":"10.1109\/LSP.2020.3000968","volume":"27","author":"C Yu","year":"2020","unstructured":"Yu C, Hung K-H, Wang S-S, Tsao Y, Hung J (2020) Time-domain multi-modal bone\/air conducted speech enhancement. IEEE Signal Proc Letters 27:1035\u20131039. https:\/\/doi.org\/10.1109\/LSP.2020.3000968","journal-title":"IEEE Signal Proc Letters"},{"key":"7150_CR16","doi-asserted-by":"publisher","first-page":"3134","DOI":"10.1109\/TASLP.2022.3209943","volume":"30","author":"H Wang","year":"2022","unstructured":"Wang H, Zhang X, Wang D (2022) Fusing bone-conduction and air-conduction sensors for complex-domain speech enhancement. IEEE\/ACM transactions on audio speech and language processing 30:3134\u20133143. https:\/\/doi.org\/10.1109\/TASLP.2022.3209943","journal-title":"IEEE\/ACM transactions on audio speech and language processing"},{"issue":"2","key":"7150_CR17","doi-asserted-by":"publisher","first-page":"1355","DOI":"10.1121\/10.0028339","volume":"156","author":"K Kuang","year":"2024","unstructured":"Kuang K, Yang F, Yang J (2024) A lightweight speech enhancement network fusing bone-and air-conducted speech. The Journal of the Acoustical Society of America 156(2):1355\u20131366. https:\/\/doi.org\/10.1121\/10.0028339","journal-title":"The Journal of the Acoustical Society of America"},{"key":"7150_CR18","doi-asserted-by":"publisher","unstructured":"Tan K, Wang D (2018) A convolutional recurrent neural network for real-time speech enhancement. In: Interspeech, 2018:3229\u20133233. https:\/\/doi.org\/10.21437\/Interspeech.2018-1405","DOI":"10.21437\/Interspeech.2018-1405"},{"key":"7150_CR19","doi-asserted-by":"publisher","unstructured":"Hu Y, Liu Y, Lv S, Xing M, Zhang S, Fu Y, Wu J, Zhang B, Xie L (2020) Dccrn: Deep complex convolution recurrent network for phase-aware speech enhancement. Interspeech. https:\/\/doi.org\/10.21437\/interspeech.2020-2537","DOI":"10.21437\/interspeech.2020-2537"},{"key":"7150_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2021.108499","volume":"187","author":"A Li","year":"2022","unstructured":"Li A, Zheng C, Zhang L, Li X (2022) Glance and gaze: A collaborative learning framework for single-channel speech enhancement. Appl Acoust 187:108499. https:\/\/doi.org\/10.1016\/j.apacoust.2021.108499","journal-title":"Appl Acoust"},{"key":"7150_CR21","doi-asserted-by":"publisher","unstructured":"Pascual S, Bonafonte A, Serr\u00e0 J (2017) Segan: Speech enhancement generative adversarial network. Interspeech, 3642:(2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-1428","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"7150_CR22","doi-asserted-by":"publisher","unstructured":"D\u00e9fossez A, Synnaeve G, Adi Y (2020) Real time speech enhancement in the waveform domain. Interspeech. https:\/\/doi.org\/10.21437\/Interspeech.2020-2773","DOI":"10.21437\/Interspeech.2020-2773"},{"key":"7150_CR23","doi-asserted-by":"publisher","first-page":"1587969","DOI":"10.3389\/frsip.2025.1587969","volume":"5","author":"H Sato","year":"2025","unstructured":"Sato H, Ochiai T, Delcroix M, Moriya T, Ashihara T, Masumura R (2025) Generic speech enhancement with self-supervised representation space loss. Frontiers in Signal Processing 5:1587969","journal-title":"Frontiers in Signal Processing"},{"key":"7150_CR24","doi-asserted-by":"publisher","unstructured":"Shimamura T, Tamiya T (2005) A reconstruction filter for bone-conducted speech. In: 48th midwest symposium on circuits and systems, 2005:1847\u20131850. https:\/\/doi.org\/10.1109\/MWSCAS.2005.1594483. IEEE","DOI":"10.1109\/MWSCAS.2005.1594483"},{"issue":"9","key":"7150_CR25","doi-asserted-by":"publisher","first-page":"2505","DOI":"10.1109\/TASL.2012.2205241","volume":"20","author":"T Toda","year":"2012","unstructured":"Toda T, Nakagiri M, Shikano K (2012) Statistical voice conversion techniques for body-conducted unvoiced speech enhancement. IEEE Trans Audio Speech Lang Process 20(9):2505\u20132517. https:\/\/doi.org\/10.1109\/TASL.2012.2205241","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"7150_CR26","unstructured":"Shin HS, Kang H-G, Fingscheidt T (2012) Survey of speech enhancement supported by a bone conduction microphone. In: Speech Communication; 10. ITG Symposium, pp 1\u20134. VDE"},{"key":"7150_CR27","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1016\/j.specom.2018.06.002","volume":"104","author":"H-P Liu","year":"2018","unstructured":"Liu H-P, Tsao Y, Fuh C-S (2018) Bone-conducted speech enhancement using deep denoising autoencoder. Speech Commun 104:106\u2013112. https:\/\/doi.org\/10.1016\/j.specom.2018.06.002","journal-title":"Speech Commun"},{"issue":"7","key":"7150_CR28","doi-asserted-by":"publisher","first-page":"1316","DOI":"10.1109\/TASL.2009.2016733","volume":"17","author":"E Erzin","year":"2009","unstructured":"Erzin E (2009) Improving throat microphone speech recognition by joint analysis of throat and acoustic microphone recordings. IEEE Trans Audio Speech Lang Process 17(7):1316\u20131324. https:\/\/doi.org\/10.1109\/TASL.2009.2016733","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"7150_CR29","doi-asserted-by":"publisher","unstructured":"Zheng C, Zhang X, Sun M, Yang J, Xing Y (2018) A novel throat microphone speech enhancement framework based on deep blstm recurrent neural networks. In: 2018 IEEE 4th International Conference on Computer and Communications (ICCC), pp 1258\u20131262. https:\/\/doi.org\/10.1109\/CompComm.2018.8780872","DOI":"10.1109\/CompComm.2018.8780872"},{"issue":"12","key":"7150_CR30","doi-asserted-by":"publisher","first-page":"2001","DOI":"10.1587\/transfun.E102.A.2001","volume":"102","author":"C Zheng","year":"2019","unstructured":"Zheng C, Cao T, Yang J, Zhang X, Sun M (2019) Spectra restoration of bone-conducted speech via attention-based contextual information and spectro-temporal structure constraint. IEICE Trans Fundam Electron Commun Comput Sci 102(12):2001\u20132007. https:\/\/doi.org\/10.1587\/transfun.E102.A.2001","journal-title":"IEICE Trans Fundam Electron Commun Comput Sci"},{"key":"7150_CR31","doi-asserted-by":"publisher","unstructured":"Pan Q, Zhou J, Gao T, Tao L (2020) Bone-conducted speech to air-conducted speech conversion based on cycleconsistent adversarial networks. In: 2020 IEEE 3rd International Conference on Information Communication and Signal Processing (ICICSP), pp 168\u2013172. https:\/\/doi.org\/10.1109\/ICICSP50920.2020.9232121","DOI":"10.1109\/ICICSP50920.2020.9232121"},{"issue":"3","key":"7150_CR32","doi-asserted-by":"publisher","first-page":"153","DOI":"10.3390\/a16030153","volume":"16","author":"L Cheng","year":"2023","unstructured":"Cheng L, Dou Y, Zhou J, Wang H, Tao L (2023) Speaker-independent spectral enhancement for bone-conducted speech. Algorithms 16(3):153. https:\/\/doi.org\/10.3390\/a16030153","journal-title":"Algorithms"},{"key":"7150_CR33","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1109\/TASLP.2023.3337988","volume":"32","author":"C Li","year":"2023","unstructured":"Li C, Yang F, Yang J (2023) A two-stage approach to quality restoration of bone-conducted speech. IEEE\/ACM Transactions on Audio Speech and Language Processing 32:818\u2013829. https:\/\/doi.org\/10.1109\/TASLP.2023.3337988","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"issue":"12","key":"7150_CR34","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TASL.2013.2274696","volume":"21","author":"T Dekens","year":"2013","unstructured":"Dekens T, Verhelst W (2013) Body conducted speech enhancement by equalization and signal fusion. IEEE Trans Audio Speech Lang Process 21(12):2481\u20132492. https:\/\/doi.org\/10.1109\/TASL.2013.2274696","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"7150_CR35","doi-asserted-by":"crossref","unstructured":"Lai RL, Hou J-C, Chern I-C, Hung K-H, Chen Y-T, Gogate M, Arslan T, Hussain A, Lin C-W, Tsao Y (2025) Leveraging self-supervised audio-visual pretrained models to improve vocoded speech intelligibility in cochlear implant simulation. IEEE Trans Biomed Eng","DOI":"10.1109\/TBME.2025.3610284"},{"key":"7150_CR36","doi-asserted-by":"crossref","unstructured":"Kim Y, Chung Y (2025) Modality-specific speech enhancement and noise-adaptive fusion for acoustic and body-conduction microphone framework","DOI":"10.21437\/Interspeech.2025-2581"},{"issue":"1","key":"7150_CR37","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1109\/78.277808","volume":"42","author":"TQ Nguyen","year":"2002","unstructured":"Nguyen TQ (2002) Near-perfect-reconstruction pseudo-qmf banks. IEEE Trans Signal Process 42(1):65\u201376. https:\/\/doi.org\/10.1109\/78.277808","journal-title":"IEEE Trans Signal Process"},{"key":"7150_CR38","doi-asserted-by":"publisher","unstructured":"Ni J, Bai Y, Zhang W, Yao T, Mei T (2023) Deep equilibrium multimodal fusion. arXiv preprint arXiv:2306.16645, https:\/\/doi.org\/10.48550\/arXiv.2306.16645","DOI":"10.48550\/arXiv.2306.16645"},{"key":"7150_CR39","unstructured":"Kumar K, Kumar R, De Boissiere T, Gestin L, Teoh WZ, Sotelo J, De Brebisson A, Bengio Y, Courville AC (2019) Melgan: Generative adversarial networks for conditional waveform synthesis. Advances in neural information processing systems 32"},{"key":"7150_CR40","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1109\/TASLP.2022.3224305","volume":"31","author":"M Wang","year":"2022","unstructured":"Wang M, Chen J, Zhang X-L, Rahardja S (2022) End-to-end multi-modal speech recognition on an air and bone conducted speech corpus. IEEE\/ACM Transactions on Audio Speech and Language Processing 31:513\u2013524. https:\/\/doi.org\/10.1109\/TASLP.2022.3224305","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"key":"7150_CR41","doi-asserted-by":"publisher","unstructured":"Reddy CK, Gopal V, Cutler R (2021) Dnsmos: A non-intrusive perceptual objective speech quality metric to evaluate noise suppressors. In: ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 6493\u20136497. https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9414878","DOI":"10.1109\/ICASSP39728.2021.9414878"},{"key":"7150_CR42","doi-asserted-by":"publisher","unstructured":"Saki F, Sehgal A, Panahi I, Kehtarnavaz N (2016) Smartphone-based real-time classification of noise signals using subband features and random forest classifier. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 2204\u20132208. https:\/\/doi.org\/10.1109\/ICASSP.2016.7472068","DOI":"10.1109\/ICASSP.2016.7472068"},{"key":"7150_CR43","doi-asserted-by":"publisher","unstructured":"Dean D, Sridharan S, Vogt R, Mason M (2010) The qut-noise-timit corpus for evaluation of voice activity detection algorithms. In: Proceedings of the 11th Annual Conference of the International Speech Communication Association, pp 3110\u20133113. https:\/\/doi.org\/10.21437\/Interspeech.2010-774. International Speech Communication Association","DOI":"10.21437\/Interspeech.2010-774"},{"key":"7150_CR44","doi-asserted-by":"publisher","unstructured":"Varga A, Steeneken HJ (1993) Assessment for automatic speech recognition: Ii. noisex-92: A database and an experiment to study the effect of additive noise on speech recognition systems. Speech communication 12(3):247\u2013251. https:\/\/doi.org\/10.1016\/0167-6393(93)90095-3","DOI":"10.1016\/0167-6393(93)90095-3"},{"issue":"8","key":"7150_CR45","doi-asserted-by":"publisher","first-page":"2067","DOI":"10.1109\/TASL.2010.2041110","volume":"18","author":"G Hu","year":"2010","unstructured":"Hu G, Wang D (2010) A tandem algorithm for pitch estimation and voiced speech segregation. IEEE Trans Audio Speech Lang Process 18(8):2067\u20132079","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"7150_CR46","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2022.109058","volume":"200","author":"M Wang","year":"2022","unstructured":"Wang M, Chen J, Zhang X, Huang Z, Rahardja S (2022) Multi-modal speech enhancement with bone-conducted speech in time domain. Appl Acoust 200:109058. https:\/\/doi.org\/10.1016\/j.apacoust.2022.109058","journal-title":"Appl Acoust"},{"key":"7150_CR47","doi-asserted-by":"publisher","unstructured":"Rix AW, Beerends JG, Hollier MP, Hekstra AP (2001) Perceptual evaluation of speech quality (pesq)-a new method for speech quality assessment of telephone networks and codecs. In: 2001 IEEE international conference on acoustics, speech, and signal processing. Proceedings (Cat. No. 01CH37221), vol 2, pp 749\u2013752. https:\/\/doi.org\/10.1109\/ICASSP.2001.941023. IEEE","DOI":"10.1109\/ICASSP.2001.941023"},{"issue":"7","key":"7150_CR48","doi-asserted-by":"publisher","first-page":"2125","DOI":"10.1109\/TASL.2011.2114881","volume":"19","author":"CH Taal","year":"2011","unstructured":"Taal CH, Hendriks RC, Heusdens R, Jensen J (2011) An algorithm for intelligibility prediction of time-frequency weighted noisy speech. IEEE Trans Audio Speech Lang Process 19(7):2125\u20132136. https:\/\/doi.org\/10.1109\/TASL.2011.2114881","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"11","key":"7150_CR49","doi-asserted-by":"publisher","first-page":"2009","DOI":"10.1109\/TASLP.2016.2585878","volume":"24","author":"J Jensen","year":"2016","unstructured":"Jensen J, Taal CH (2016) An algorithm for predicting the intelligibility of speech masked by modulated noise maskers. IEEE\/ACM Transactions on Audio Speech and Language Processing 24(11):2009\u20132022. https:\/\/doi.org\/10.1109\/TASLP.2016.2585878","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"key":"7150_CR50","unstructured":"Radford A, Kim JW, Xu T, Brockman G, McLeavey C, Sutskever I (2023) Robust speech recognition via large-scale weak supervision. In: International conference on machine learning, pp 28492\u201328518. PMLR"},{"key":"7150_CR51","doi-asserted-by":"publisher","unstructured":"Yao Z, Wu D, Wang X, Zhang B, Yu F, Yang C, Peng Z, Chen X, Xie L, Lei X (2021) Wenet: Production oriented streaming and non-streaming end-to-end speech recognition toolkit. In: Interspeech, vol 2021, pp 4054\u20134058. https:\/\/doi.org\/10.21437\/Interspeech.2021-1983","DOI":"10.21437\/Interspeech.2021-1983"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-026-07150-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-026-07150-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-026-07150-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T10:48:05Z","timestamp":1779792485000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-026-07150-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,18]]},"references-count":51,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["7150"],"URL":"https:\/\/doi.org\/10.1007\/s10489-026-07150-z","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,18]]},"assertion":[{"value":"9 November 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"165"}}