{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T09:57:14Z","timestamp":1774605434884,"version":"3.50.1"},"reference-count":110,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T00:00:00Z","timestamp":1757376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T00:00:00Z","timestamp":1757376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s00034-025-03269-5","type":"journal-article","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T18:29:14Z","timestamp":1757442554000},"page":"2342-2400","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Lightweight Multi-axial Transformer with MS-SENet and D3Net for Single Channel Speech Enhancement"],"prefix":"10.1007","volume":"45","author":[{"given":"Silpa","family":"Peethala","sequence":"first","affiliation":[]},{"given":"V.","family":"Sunnydayal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,9]]},"reference":[{"key":"3269_CR1","first-page":"2847","volume":"2","author":"T Abd El-Hafeez","year":"2010","unstructured":"T. Abd El-Hafeez, A new effective system for filtering pornography videos. Int. J. Comput. Sci. Eng. 2, 2847\u20132852 (2010)","journal-title":"Int. J. Comput. Sci. Eng."},{"key":"3269_CR2","unstructured":"F. Albu, N. Dumitriu, L.D. Stanciu, Speech enhancement by spectral subtraction, in Proceedings of the International Symposium on Electronics and Telecommunications (1996), pp. 78\u201383."},{"key":"3269_CR3","doi-asserted-by":"publisher","first-page":"146513","DOI":"10.1109\/ACCESS.2024.3397813","volume":"12","author":"MA Alohali","year":"2024","unstructured":"M.A. Alohali, N. Saleem, D. Rhouma, M. Medani, H. Elmannai, S. Bourouis, Temporally dynamic spiking transformer network for speech enhancement. IEEE Access 12, 146513\u2013146526 (2024). https:\/\/doi.org\/10.1109\/ACCESS.2024.3397813","journal-title":"IEEE Access"},{"issue":"1","key":"3269_CR4","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1186\/s40537-023-00734-w","volume":"10","author":"L Alzubaidi","year":"2023","unstructured":"L. Alzubaidi, O. Al-Shamma, M.A. Fadhel, J. Zhang, Y. Duan, A. Al-Dujaili, A survey on deep learning tools dealing with data scarcity: definitions, challenges, solutions, tips, and applications. J. Big Data 10(1), 46 (2023). https:\/\/doi.org\/10.1186\/s40537-023-00734-w","journal-title":"J. Big Data"},{"issue":"1","key":"3269_CR5","doi-asserted-by":"publisher","first-page":"226","DOI":"10.3390\/su14010226","volume":"14","author":"A Badawy","year":"2021","unstructured":"A. Badawy, J.A. Fisteus, T.M. Mahmoud, T. Abd El-Hafeez, Topic extraction and interactive knowledge graphs for learning resources. Sustainability 14(1), 226 (2021). https:\/\/doi.org\/10.3390\/su14010226","journal-title":"Sustainability"},{"key":"3269_CR6","doi-asserted-by":"publisher","first-page":"670","DOI":"10.1109\/TASLP.2022.3146383","volume":"31","author":"M Barhoush","year":"2022","unstructured":"M. Barhoush, A. Hallawa, A. Peine, L. Martin, A. Schmeink, Localization-driven speech enhancement in noisy multi-speaker hospital environments using deep learning and meta learning. IEEE ACM Trans. Audio Speech Lang. Process. 31, 670\u2013683 (2022). https:\/\/doi.org\/10.1109\/TASLP.2022.3146383","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"issue":"4","key":"3269_CR7","doi-asserted-by":"publisher","first-page":"2341","DOI":"10.1007\/s00034-023-02592-9","volume":"43","author":"K Bhangale","year":"2024","unstructured":"K. Bhangale, M. Kothandaraman, Speech emotion recognition using generative adversarial network and deep convolutional neural network. Circuits Syst. Signal Process. 43(4), 2341\u20132384 (2024). https:\/\/doi.org\/10.1007\/s00034-023-02592-9","journal-title":"Circuits Syst. Signal Process."},{"key":"3269_CR8","doi-asserted-by":"publisher","unstructured":"A. Bishnu, A.K. Pandey, R. Singh, N. Mittal, P.K. Singh, Live demonstration: cloud-based audio-visual speech enhancement in multimodal hearing-aids, in IEEE International Symposium on Circuits and Systems (ISCAS) (2023). https:\/\/doi.org\/10.1109\/ISCAS46773.2023.10179716","DOI":"10.1109\/ISCAS46773.2023.10179716"},{"key":"3269_CR9","doi-asserted-by":"publisher","first-page":"2418","DOI":"10.1109\/TASLP.2024.3382635","volume":"32","author":"BJ Borgstr\u00f6m","year":"2024","unstructured":"B.J. Borgstr\u00f6m, M.S. Brandstein, A multiscale autoencoder (MSAE) framework for end-to-end neural network speech enhancement. IEEE ACM Trans. Audio Speech Lang. Process. 32, 2418\u20132431 (2024). https:\/\/doi.org\/10.1109\/TASLP.2024.3382635","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR10","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1109\/TASLP.2020.3048706","volume":"29","author":"BJ Borgstr\u00f6m","year":"2020","unstructured":"B.J. Borgstr\u00f6m, M.S. Brandstein, Speech enhancement via attention masking network (SEAMNET): an end-to-end system for joint suppression of noise and reverberation. IEEE ACM Trans. Audio Speech Lang. Process. 29, 515\u2013526 (2020). https:\/\/doi.org\/10.1109\/TASLP.2020.3048706","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR11","doi-asserted-by":"publisher","unstructured":"A. Briegleb, W. Kellermann, Spatially constrained vs. unconstrained filtering in neural spatiospectral filters for multichannel speech enhancement, in IEEE 32nd European Signal Processing Conference (EUSIPCO) (2024), pp. 102\u2013106. https:\/\/doi.org\/10.23919\/EUSIPCO58844.2024.10520603","DOI":"10.23919\/EUSIPCO58844.2024.10520603"},{"issue":"3","key":"3269_CR12","doi-asserted-by":"publisher","first-page":"1418","DOI":"10.1109\/TNNLS.2021.3050004","volume":"34","author":"D Chen","year":"2021","unstructured":"D. Chen, X. Li, S. Li, A novel convolutional neural network model based on beetle antennae search optimization algorithm for computerized tomography diagnosis. IEEE Trans. Neural Netw. Learn. Syst. 34(3), 1418\u20131429 (2021). https:\/\/doi.org\/10.1109\/TNNLS.2021.3050004","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"3269_CR13","doi-asserted-by":"crossref","unstructured":"X. Chen, H. Bi, W.T. Lai, F. Ma, Monaural speech enhancement on drone via adapter-based transfer learning, in IEEE 18th International Workshop on Acoustic Signal Enhancement (IWAENC) (2024), pp. 85\u201389","DOI":"10.1109\/IWAENC61483.2024.10694014"},{"key":"3269_CR14","unstructured":"CHiME Challenge. CHiME5 Dataset (n.d.). Retrieved from https:\/\/www.chimechallenge.org\/datasets\/chime5"},{"key":"3269_CR15","doi-asserted-by":"crossref","unstructured":"A. Chinaev, T. Spitz, S. Thaleiser, G. Enzner, Matrix study of feature compression types and instrumental speech quality metrics in ultra-light DNN-based spectral speech enhancement, in IEEE 18th International Workshop on Acoustic Signal Enhancement (IWAENC) (2024), pp. 11\u201315.","DOI":"10.1109\/IWAENC61483.2024.10694242"},{"key":"3269_CR16","doi-asserted-by":"crossref","unstructured":"G. Close, T. Hain, S. Goetze, Hallucination in perceptual metric-driven speech enhancement networks, in IEEE 32nd European Signal Processing Conference (EUSIPCO) (2024), pp. 21\u201325","DOI":"10.23919\/EUSIPCO63174.2024.10714927"},{"key":"3269_CR17","unstructured":"Common Voice English. https:\/\/www.openslr.org\/12"},{"issue":"11","key":"3269_CR18","doi-asserted-by":"publisher","first-page":"1515","DOI":"10.1016\/j.specom.2006.05.002","volume":"48","author":"TH Dat","year":"2006","unstructured":"T.H. Dat, K. Takeda, F. Itakura, On-line Gaussian mixture modeling in the log-power domain for signal-to-noise ratio estimation and speech enhancement. Speech Commun. 48(11), 1515\u20131527 (2006). https:\/\/doi.org\/10.1016\/j.specom.2006.05.002","journal-title":"Speech Commun."},{"key":"3269_CR19","doi-asserted-by":"publisher","unstructured":"H. Fang, T. Gerkmann, Uncertainty estimation in deep speech enhancement using complex Gaussian mixture models, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096056","DOI":"10.1109\/ICASSP49357.2023.10096056"},{"key":"3269_CR20","doi-asserted-by":"publisher","first-page":"1587","DOI":"10.1109\/TASLP.2023.3281802","volume":"31","author":"H Fang","year":"2023","unstructured":"H. Fang, D. Becker, S. Wermter, T. Gerkmann, Integrating uncertainty into neural network-based speech enhancement. IEEE ACM Trans. Audio Speech Lang. Process. 31, 1587\u20131600 (2023). https:\/\/doi.org\/10.1109\/TASLP.2023.3281802","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"issue":"3","key":"3269_CR21","first-page":"39","volume":"164","author":"HM Farghaly","year":"2020","unstructured":"H.M. Farghaly, A.A. Ali, T. Abd El-Hafeez, Building an effective and accurate associative classifier based on support vector machine. Sylwan 164(3), 39\u201356 (2020)","journal-title":"Sylwan"},{"key":"3269_CR22","doi-asserted-by":"publisher","unstructured":"H.M. Farghaly, A.A. Ali, T. Abd El-Hafeez, Developing an efficient method for automatic threshold detection based on hybrid feature selection approach, in Artificial Intelligence and Bioinspired Computational Methods: Proceedings of the 9th Computer Science On-Line Conference, ed. J. Holub, R. \u0160perka, vol. 2 (Springer, 2020), pp. 56\u201372. https:\/\/doi.org\/10.1007\/978-3-030-61702-0_5","DOI":"10.1007\/978-3-030-61702-0_5"},{"key":"3269_CR23","doi-asserted-by":"publisher","first-page":"123456","DOI":"10.1109\/ACCESS.2024.3398401","volume":"12","author":"LV Fiorio","year":"2024","unstructured":"L.V. Fiorio, B. Karanov, B. Defraene, J. David, F. Widdershoven, W. Van Houtum, R.M. Aarts, Spectral masking with explicit time-context windowing for neural network-based monaural speech enhancement. IEEE Access 12, 123456 (2024). https:\/\/doi.org\/10.1109\/ACCESS.2024.3398401","journal-title":"IEEE Access"},{"key":"3269_CR24","doi-asserted-by":"publisher","first-page":"126513","DOI":"10.1109\/ACCESS.2024.3413740","volume":"12","author":"LV Fiorio","year":"2024","unstructured":"L.V. Fiorio, B. Karanov, B. Defraene, J. David, F. Widdershoven, W. Van Houtum, R.M. Aarts, Spectral masking with explicit time-context windowing for neural network-based monaural speech enhancement. IEEE Access 12, 126513\u2013126526 (2024). https:\/\/doi.org\/10.1109\/ACCESS.2024.3413740","journal-title":"IEEE Access"},{"key":"3269_CR25","doi-asserted-by":"publisher","unstructured":"T. Fujimura, T. Toda, Analysis of noisy-target training for DNN-based speech enhancement, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10095292","DOI":"10.1109\/ICASSP49357.2023.10095292"},{"key":"3269_CR26","doi-asserted-by":"publisher","unstructured":"C. Haruta, N. Ono, Y. Kinoshita, Framewise finite impulse response filtering based on time-frequency mask for low-latency speech enhancement, in IEEE Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC) (2021). https:\/\/doi.org\/10.1109\/APSIPAASC52831.2021.9689876","DOI":"10.1109\/APSIPAASC52831.2021.9689876"},{"issue":"1","key":"3269_CR27","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1186\/s40537-024-00985-w","volume":"11","author":"E Hassan","year":"2024","unstructured":"E. Hassan, S. Elbedwehy, M.Y. Shams, T. Abd El-Hafeez, N. El-Rashidy, Optimizing poultry audio signal classification with deep learning and burn layer fusion. J. Big Data 11(1), 135 (2024). https:\/\/doi.org\/10.1186\/s40537-024-00985-w","journal-title":"J. Big Data"},{"key":"3269_CR28","doi-asserted-by":"publisher","unstructured":"B. Irvin, M. Stamenovic, M. Kegler, L.C. Yang, Self-supervised learning for speech enhancement through synthesis, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10095367","DOI":"10.1109\/ICASSP49357.2023.10095367"},{"key":"3269_CR29","doi-asserted-by":"publisher","unstructured":"S.A. Jerjees, H.J. Mohammed, H.S. Radeaf, B.M. Mahmmod, S.H. Abdulhussain, Deep learning-based speech enhancement algorithm using Charlier transform, in IEEE 15th International Conference on Developments in eSystems Engineering (DeSE) (2023), pp. 100\u2013105. https:\/\/doi.org\/10.1109\/DeSE60156.2023.10135888","DOI":"10.1109\/DeSE60156.2023.10135888"},{"key":"3269_CR30","doi-asserted-by":"publisher","first-page":"1758","DOI":"10.1109\/TASLP.2023.3284509","volume":"31","author":"W Jiang","year":"2023","unstructured":"W. Jiang, K. Yu, Speech enhancement with integration of neural homomorphic synthesis and spectral masking. IEEE ACM Trans. Audio Speech Lang. Process. 31, 1758\u20131770 (2023). https:\/\/doi.org\/10.1109\/TASLP.2023.3284509","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"issue":"13","key":"3269_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.5120\/16894-7017","volume":"96","author":"N Kaladharan","year":"2014","unstructured":"N. Kaladharan, Speech enhancement by spectral subtraction method. Int. J. Comput. Appl. 96(13), 1\u20135 (2014). https:\/\/doi.org\/10.5120\/16894-7017","journal-title":"Int. J. Comput. Appl."},{"issue":"2","key":"3269_CR32","doi-asserted-by":"publisher","first-page":"122204","DOI":"10.1007\/s11432-020-3263-0","volume":"65","author":"AT Khan","year":"2022","unstructured":"A.T. Khan, S. Li, X. Cao, Human guided cooperative robotic agents in smart home using beetle antennae search. Sci. China Inf. Sci. 65(2), 122204 (2022). https:\/\/doi.org\/10.1007\/s11432-020-3263-0","journal-title":"Sci. China Inf. Sci."},{"key":"3269_CR33","doi-asserted-by":"publisher","unstructured":"D.P. Kingma, J. Ba, Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014). https:\/\/doi.org\/10.48550\/arXiv.1412.6980","DOI":"10.48550\/arXiv.1412.6980"},{"issue":"02","key":"3269_CR34","doi-asserted-by":"publisher","first-page":"2150017","DOI":"10.1142\/S0219477521500173","volume":"20","author":"B Kumar","year":"2021","unstructured":"B. Kumar, Comparative performance evaluation of greedy algorithms for speech enhancement system. Fluct. Noise Lett. 20(02), 2150017 (2021). https:\/\/doi.org\/10.1142\/S0219477521500173","journal-title":"Fluct. Noise Lett."},{"key":"3269_CR35","doi-asserted-by":"publisher","unstructured":"S. Kumar, K. Kumar, IRSC: integrated automated review mining system using virtual machines in cloud environment, in IEEE Conference on Information and Communication Technology (CICT) (2018), pp. 1\u20136. https:\/\/doi.org\/10.1109\/INFOCOMTECH.2018.8722441","DOI":"10.1109\/INFOCOMTECH.2018.8722441"},{"key":"3269_CR36","doi-asserted-by":"publisher","first-page":"101685","DOI":"10.1016\/j.csl.2024.101685","volume":"89","author":"S Leglaive","year":"2025","unstructured":"S. Leglaive, M. Fraticelli, H. ElGhazaly, L. Borne, M. Sadeghi, S. Wisdom, J.P. Barker, Objective and subjective evaluation of speech enhancement methods in the UDASE task of the 7th CHiME challenge. Comput. Speech Lang. 89, 101685 (2025). https:\/\/doi.org\/10.1016\/j.csl.2024.101685","journal-title":"Comput. Speech Lang."},{"issue":"7","key":"3269_CR37","doi-asserted-by":"publisher","first-page":"1233","DOI":"10.1109\/JAS.2022.105448","volume":"9","author":"Y Lei","year":"2022","unstructured":"Y. Lei, H. Zhu, J. Zhang, H. Shan, Meta ordinal regression forest for medical image classification with ordinal labels. IEEE CAA J. Autom. Sin. 9(7), 1233\u20131247 (2022). https:\/\/doi.org\/10.1109\/JAS.2022.105448","journal-title":"IEEE CAA J. Autom. Sin."},{"key":"3269_CR38","doi-asserted-by":"publisher","unstructured":"M. Li, Y. Zheng, D. Li, Y. Wu, Y. Wang, H Fei, MS-SENet: enhancing speech emotion recognition through multi-scale feature fusion with squeeze-and-excitation blocks, in Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 12271\u201312275. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10447232","DOI":"10.1109\/ICASSP48485.2024.10447232"},{"key":"3269_CR39","doi-asserted-by":"publisher","first-page":"238","DOI":"10.1016\/j.neucom.2021.03.063","volume":"448","author":"S Li","year":"2021","unstructured":"S. Li, X. Xing, W. Fan, B. Cai, P. Fordson, X. Xu, Spatiotemporal and frequential cascaded attention networks for speech emotion recognition. Neurocomputing 448, 238\u2013248 (2021). https:\/\/doi.org\/10.1016\/j.neucom.2021.03.063","journal-title":"Neurocomputing"},{"issue":"11","key":"3269_CR40","doi-asserted-by":"publisher","first-page":"8778","DOI":"10.1109\/TNNLS.2022.3157650","volume":"34","author":"Z Li","year":"2022","unstructured":"Z. Li, S. Li, O.O. Bamasag, A. Alhothali, X. Luo, Diversified regularization enhanced training for effective manipulator calibration. IEEE Trans. Neural Netw. Learn. Syst. 34(11), 8778\u20138790 (2022). https:\/\/doi.org\/10.1109\/TNNLS.2022.3157650","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"3269_CR41","doi-asserted-by":"publisher","unstructured":"X. Liang, Z. Zhang, M. Wang, R. Xu, Lightweight multi-axial transformer with frequency prompt for single channel speech enhancement, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 10511\u201310515. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10454738","DOI":"10.1109\/ICASSP48485.2024.10454738"},{"key":"3269_CR42","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3398054","author":"CY Lim","year":"2024","unstructured":"C.Y. Lim, J. Park, K. Kim, H. Ko, Noise-aware extended U-Net with split encoder and feature refinement module for robust speaker verification in noisy environments. IEEE Access (2024). https:\/\/doi.org\/10.1109\/ACCESS.2024.3398054","journal-title":"IEEE Access"},{"issue":"3","key":"3269_CR43","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1109\/TASSP.1978.1163127","volume":"26","author":"JS Lim","year":"1978","unstructured":"J.S. Lim, A.V. Oppenheim, All-pole modeling of degraded speech. IEEE Trans. Acoust. Speech Signal Process. 26(3), 197\u2013210 (1978). https:\/\/doi.org\/10.1109\/TASSP.1978.1163127","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"3269_CR44","doi-asserted-by":"publisher","unstructured":"X. Lin, X. Bie, S. Leglaive, L. Girin, X. Alameda-Pineda, Speech modeling with a hierarchical transformer dynamical VAE, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096025","DOI":"10.1109\/ICASSP49357.2023.10096025"},{"key":"3269_CR45","doi-asserted-by":"publisher","unstructured":"J. Liu, X. Zhang, ICCRN: inplace cepstral convolutional recurrent neural network for monaural speech enhancement, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023). https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096112","DOI":"10.1109\/ICASSP49357.2023.10096112"},{"issue":"11","key":"3269_CR46","doi-asserted-by":"publisher","first-page":"5931","DOI":"10.1109\/TII.2019.2906866","volume":"15","author":"H Lu","year":"2019","unstructured":"H. Lu, L. Jin, X. Luo, B. Liao, D. Guo, L. Xiao, RNN for solving perturbed time-varying underdetermined linear system with double bound limits on residual errors and state variables. IEEE Trans. Ind. Inform. 15(11), 5931\u20135942 (2019). https:\/\/doi.org\/10.1109\/TII.2019.2906866","journal-title":"IEEE Trans. Ind. Inform."},{"issue":"8","key":"3269_CR47","first-page":"698","volume":"5","author":"TM Mahmoud","year":"2013","unstructured":"T.M. Mahmoud, T. Abd-El-Hafeez, A. Badawy, A framework for an E-learning system based on semantic web. Int. J. Comput. Sci. Eng. 5(8), 698 (2013)","journal-title":"Int. J. Comput. Sci. Eng."},{"issue":"12","key":"3269_CR48","doi-asserted-by":"publisher","first-page":"2162","DOI":"10.1109\/TASLP.2015.2467173","volume":"23","author":"D Marquardt","year":"2015","unstructured":"D. Marquardt, V. Hohmann, S. Doclo, Interaural coherence preservation in multi-channel Wiener filtering-based noise reduction for binaural hearing aids. IEEE ACM Trans. Audio Speech Lang. Process. 23(12), 2162\u20132176 (2015). https:\/\/doi.org\/10.1109\/TASLP.2015.2467173","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR49","doi-asserted-by":"publisher","unstructured":"F. Mathieu, T. Courtat, G. Richard, G. Peeters, Learning interpretable filters in wav-UNet for speech enhancement, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096054","DOI":"10.1109\/ICASSP49357.2023.10096054"},{"issue":"1","key":"3269_CR50","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1186\/s40537-024-00951-6","volume":"11","author":"G Mostafa","year":"2024","unstructured":"G. Mostafa, H. Mahmoud, T. Abd El-Hafeez, M.E. ElAraby, Feature reduction for hepatocellular carcinoma prediction using machine learning algorithms. J. Big Data 11(1), 88 (2024). https:\/\/doi.org\/10.1186\/s40537-024-00951-6","journal-title":"J. Big Data"},{"key":"3269_CR51","unstructured":"Mozilla. CommonVoice dataset. https:\/\/commonvoice.mozilla.org\/en"},{"key":"3269_CR52","unstructured":"Multilingual LibriSpeech (MLS). https:\/\/www.openslr.org\/94"},{"key":"3269_CR53","doi-asserted-by":"publisher","unstructured":"D.C. Naik, A.S. Murthy, R. Nuthakki, Modified magnitude spectral subtraction methods for speech enhancement, in IEEE International Conference on Electrical, Electronics, Communication, Computer, and Optimization Techniques (ICEECCOT) (2017), pp. 274\u2013279. https:\/\/doi.org\/10.1109\/ICEECCOT.2017.8284681","DOI":"10.1109\/ICEECCOT.2017.8284681"},{"key":"3269_CR54","doi-asserted-by":"publisher","unstructured":"S. Nakaoka, T. Toda, N. Hiraoka, A. Nakamura, Teacher-student learning for low-latency online speech enhancement using wave-unet, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2021). https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9414463","DOI":"10.1109\/ICASSP39728.2021.9414463"},{"key":"3269_CR55","doi-asserted-by":"publisher","unstructured":"A. Negi, K. Kumar, N.S. Chaudhari, N. Singh, P. Chauhan, Predictive analytics for recognizing human activities using residual network and fine-tuning, in Big Data Analytics: 9th International Conference, BDA Proceedings, ed. S. Misra et al. (Springer, 2021), pp. 296\u2013310. https:\/\/doi.org\/10.1007\/978-3-030-93736-4_22","DOI":"10.1007\/978-3-030-93736-4_22"},{"key":"3269_CR56","doi-asserted-by":"publisher","first-page":"120746","DOI":"10.1016\/j.eswa.2023.120746","volume":"232","author":"R Nisa","year":"2023","unstructured":"R. Nisa, H. Showkat, A. Baba, The speech signal enhancement approach with multiple sub-frames analysis for complex magnitude and phase spectrum recompense. Expert Syst. Appl. 232, 120746 (2023). https:\/\/doi.org\/10.1016\/j.eswa.2023.120746","journal-title":"Expert Syst. Appl."},{"key":"3269_CR57","doi-asserted-by":"crossref","unstructured":"Y.C. Pan, Y.L. Shen, Y.F. Liao, T.S. Chi, Band-Split Inter-SubNet: band-split with subband interaction for monaural speech enhancement, in IEEE Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC) (2024), pp. 1\u20136","DOI":"10.1109\/APSIPAASC63619.2025.10849208"},{"issue":"7","key":"3269_CR58","doi-asserted-by":"publisher","first-page":"1179","DOI":"10.1109\/TASLP.2019.2915160","volume":"27","author":"A Pandey","year":"2019","unstructured":"A. Pandey, D. Wang, A new framework for CNN-based speech enhancement in the time domain. IEEE ACM Trans. Audio Speech Lang. Process. 27(7), 1179\u20131188 (2019). https:\/\/doi.org\/10.1109\/TASLP.2019.2915160","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR59","doi-asserted-by":"publisher","first-page":"1360","DOI":"10.1109\/TASLP.2023.3256301","volume":"31","author":"A Pandey","year":"2023","unstructured":"A. Pandey, D. Wang, Attentive training: a new training framework for speech enhancement. IEEE ACM Trans. Audio Speech Lang. Process. 31, 1360\u20131370 (2023). https:\/\/doi.org\/10.1109\/TASLP.2023.3256301","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR60","doi-asserted-by":"publisher","unstructured":"A. Pandey, D. Wang, TCNN: temporal convolutional neural network for real-time speech enhancement in the time domain, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2019), pp. 6875\u20136879. https:\/\/doi.org\/10.1109\/ICASSP.2019.8682466","DOI":"10.1109\/ICASSP.2019.8682466"},{"key":"3269_CR61","doi-asserted-by":"publisher","unstructured":"V. Pratap, Q. Xu, A. Sriram, G. Synnaeve, R. Collobert, MLS: a large-scale multilingual dataset for speech research. arXiv preprint arXiv:2012.03411 (2020). https:\/\/doi.org\/10.48550\/arXiv.2012.03411","DOI":"10.48550\/arXiv.2012.03411"},{"key":"3269_CR62","doi-asserted-by":"publisher","first-page":"107404","DOI":"10.1016\/j.patcog.2020.107404","volume":"106","author":"X Qin","year":"2020","unstructured":"X. Qin, Z. Zhang, C. Huang, M. Dehghan, O.R. Zaiane, M. Jagersand, U2-net: going deeper with nested U-structure for salient object detection. Pattern Recogn. 106, 107404 (2020). https:\/\/doi.org\/10.1016\/j.patcog.2020.107404","journal-title":"Pattern Recogn."},{"issue":"4","key":"3269_CR63","doi-asserted-by":"publisher","first-page":"2236","DOI":"10.1121\/1.1603233","volume":"114","author":"N Roman","year":"2003","unstructured":"N. Roman, D. Wang, G.J. Brown, Speech segregation based on sound localization. J. Acoust. Soc. Am. 114(4), 2236\u20132252 (2003). https:\/\/doi.org\/10.1121\/1.1603233","journal-title":"J. Acoust. Soc. Am."},{"key":"3269_CR64","doi-asserted-by":"publisher","first-page":"104408","DOI":"10.1016\/j.dsp.2023.104408","volume":"147","author":"N Saleem","year":"2024","unstructured":"N. Saleem, T.S. Gunawan, S. Dhahbi, S. Bourouis, Time domain speech enhancement with CNN and time-attention transformer. Digit. Signal Process. 147, 104408 (2024). https:\/\/doi.org\/10.1016\/j.dsp.2023.104408","journal-title":"Digit. Signal Process."},{"key":"3269_CR65","doi-asserted-by":"publisher","unstructured":"P. Sandhya, R. Bandi, D.D. Himabindu, Stock price prediction using recurrent neural network and LSTM, in IEEE 6th International Conference on Computing Methodologies and Communication (ICCMC) (2022), pp. 1723\u20131728. https:\/\/doi.org\/10.1109\/ICCMC53470.2022.9753872","DOI":"10.1109\/ICCMC53470.2022.9753872"},{"key":"3269_CR66","doi-asserted-by":"publisher","first-page":"123608","DOI":"10.1016\/j.eswa.2023.123608","volume":"249","author":"MY Shams","year":"2024","unstructured":"M.Y. Shams, T. Abd El-Hafeez, & Hassan, E Acoustic data detection in large-scale emergency vehicle sirens and road noise dataset. Expert Syst. Appl. 249, 123608 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2023.123608","journal-title":"Expert Syst. Appl."},{"key":"3269_CR67","doi-asserted-by":"publisher","first-page":"26319","DOI":"10.1007\/s11042-021-11141-5","volume":"80","author":"S Sharma","year":"2021","unstructured":"S. Sharma, K. Kumar, ASL-3DCNN: American sign language recognition technique using 3-D convolutional neural networks. Multimed. Tools Appl. 80, 26319\u201326331 (2021). https:\/\/doi.org\/10.1007\/s11042-021-11141-5","journal-title":"Multimed. Tools Appl."},{"key":"3269_CR68","doi-asserted-by":"publisher","unstructured":"Y. Song, D. Kim, H.G. Kang, N. Madhu, Spectrum-aware neural vocoder based on self-supervised learning for speech enhancement, in IEEE 32nd European Signal Processing Conference (EUSIPCO) (2024), pp. 16\u201320. https:\/\/doi.org\/10.23919\/EUSIPCO58844.2024.10520501","DOI":"10.23919\/EUSIPCO58844.2024.10520501"},{"issue":"5","key":"3269_CR69","doi-asserted-by":"publisher","first-page":"1773","DOI":"10.1007\/s11554-021-01117-1","volume":"18","author":"PN Srinivasu","year":"2021","unstructured":"P.N. Srinivasu, A.K. Bhoi, R.H. Jhaveri, G.T. Reddy, M. Bilal, Probabilistic deep Q network for real-time path planning in censorious robotic procedures using force sensors. J. Real Time Image Proc. 18(5), 1773\u20131785 (2021). https:\/\/doi.org\/10.1007\/s11554-021-01117-1","journal-title":"J. Real Time Image Proc."},{"key":"3269_CR70","doi-asserted-by":"publisher","unstructured":"N. Takahashi, Y. Mitsufuji, D3Net: densely connected multidilated DenseNet for music source separation. arXiv preprint https:\/\/arxiv.org\/abs\/2010.01733 (2020). https:\/\/doi.org\/10.48550\/arXiv.2010.01733","DOI":"10.48550\/arXiv.2010.01733"},{"key":"3269_CR71","doi-asserted-by":"publisher","first-page":"3237","DOI":"10.1109\/TASLP.2023.3324371","volume":"31","author":"M Tammen","year":"2023","unstructured":"M. Tammen, S. Doclo, Parameter estimation procedures for deep multi-frame MVDR filtering for single-microphone speech enhancement. IEEE ACM Trans. Audio Speech Lang. Process. 31, 3237\u20133248 (2023). https:\/\/doi.org\/10.1109\/TASLP.2023.3324371","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR72","doi-asserted-by":"publisher","unstructured":"K. Tan, D. Wang, A convolutional recurrent neural network for real-time speech enhancement, in Proceedings of Interspeech (2018), pp. 3229\u20133233. https:\/\/doi.org\/10.21437\/Interspeech.2018-1623","DOI":"10.21437\/Interspeech.2018-1623"},{"key":"3269_CR73","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/TASLP.2019.2945365","volume":"28","author":"K Tan","year":"2019","unstructured":"K. Tan, D. Wang, Learning complex spectral mapping with gated convolutional recurrent networks for monaural speech enhancement. IEEE ACM Trans. Audio Speech Lang. Process. 28, 380\u2013390 (2019). https:\/\/doi.org\/10.1109\/TASLP.2019.2945365","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR74","doi-asserted-by":"crossref","unstructured":"S. Tao, A. Krueger, H. Barfuss, F. Appelt, C. Deng, W. Kellermann, W, Learning-based multi-channel speech presence probability estimation using a low-parameter model and integration with MVDR beamforming for multi-channel speech enhancement, in IEEE 18th International Workshop on Acoustic Signal Enhancement (IWAENC) (2024), pp. 41\u201345","DOI":"10.1109\/IWAENC61483.2024.10694149"},{"key":"3269_CR75","unstructured":"L. Thieling, L. Nippert, P. Jax, Using perceptual evaluation of speech quality (PESQ) loss for DNN-based speech enhancement, in Speech Communication; 15th ITG Conference. VDE (2023), pp. 61\u201365. https:\/\/ieeexplore.ieee.org\/document\/10323759"},{"key":"3269_CR76","doi-asserted-by":"crossref","unstructured":"V. Tokala, E. Grinstein, M. Brookes, S. Doclo, J. Jensen, P.A. Naylor, Binaural speech enhancement using deep complex convolutional transformer networks, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 681\u2013685","DOI":"10.1109\/ICASSP48485.2024.10447090"},{"key":"3269_CR77","doi-asserted-by":"publisher","unstructured":"A. van den Oord, S. Dieleman, H. Zen, K. Simonyan, O. Vinyals, A. Graves, K. Kavukcuoglu, Wavenet: a generative model for raw audio (2016). https:\/\/doi.org\/10.48550\/arXiv.1609.03499","DOI":"10.48550\/arXiv.1609.03499"},{"key":"3269_CR78","doi-asserted-by":"crossref","unstructured":"A. Varga, H.J.M. Steeneken, NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems (1993). http:\/\/svr-www.eng.cam.ac.uk\/comp.speech\/Section1\/Data\/noisex.html","DOI":"10.1016\/0167-6393(93)90095-3"},{"key":"3269_CR79","doi-asserted-by":"publisher","unstructured":"A. Vijayvergia, K. Kumar, Star: rating of reviews by exploiting variation in emotions using transfer learning framework, in IEEE Conference on Information and Communication Technology (CICT) (2018) pp. 1\u20136. .https:\/\/doi.org\/10.1109\/INFOCOMTECH.2018.8722437","DOI":"10.1109\/INFOCOMTECH.2018.8722437"},{"key":"3269_CR80","doi-asserted-by":"publisher","DOI":"10.1109\/9780470043387","volume-title":"Computational auditory scene analysis: principles, algorithms, and applications","author":"D Wang","year":"2006","unstructured":"D. Wang, G.J. Brown, Computational auditory scene analysis: principles, algorithms, and applications (Wiley-IEEE Press, Hoboken, 2006)"},{"key":"3269_CR81","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2022.3140987","volume":"71","author":"H Wang","year":"2022","unstructured":"H. Wang, T. Lin, L. Cui, B. Ma, Z. Dong, L. Song, Multitask learning-based self-attention encoding atrous convolutional neural network for remaining useful life prediction. IEEE Trans. Instrum. Meas. 71, 1\u20138 (2022). https:\/\/doi.org\/10.1109\/TIM.2022.3140987","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"3269_CR82","doi-asserted-by":"crossref","unstructured":"K.C. Wang, Y.J. Li, W.L. Chen, Y.W. Chen, Y.C. Wang, P.C. Yeh, C. Zhang, Y. Tsao, Bridging the gap: Integrating pre-trained speech enhancement and recognition models for robust speech recognition, in IEEE 32nd European Signal Processing Conference (EUSIPCO) (2024), pp. 426\u2013430.","DOI":"10.23919\/EUSIPCO63174.2024.10715447"},{"key":"3269_CR83","doi-asserted-by":"publisher","unstructured":"K. Wang, D. Hatzinakos, SEformer: dual-path conformer neural network is a good speech denoiser, in IEEE Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC) (2023), pp. 934\u2013940. https:\/\/doi.org\/10.1109\/APSIPAASC58190.2023.10315309","DOI":"10.1109\/APSIPAASC58190.2023.10315309"},{"key":"3269_CR84","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/LGRS.2022.3173430","volume":"19","author":"W Wang","year":"2022","unstructured":"W. Wang, C. Tang, X. Wang, B. Zheng, A ViT-based multiscale feature fusion approach for remote sensing image segmentation. IEEE Geosci. Remote Sens. Lett. 19, 1\u20135 (2022). https:\/\/doi.org\/10.1109\/LGRS.2022.3173430","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"issue":"12","key":"3269_CR85","doi-asserted-by":"publisher","first-page":"1849","DOI":"10.1109\/TASLP.2014.2341273","volume":"22","author":"Y Wang","year":"2014","unstructured":"Y. Wang, A. Narayanan, D. Wang, On training targets for supervised speech separation. IEEE ACM Trans. Audio Speech Lang. Process. 22(12), 1849\u20131858 (2014). https:\/\/doi.org\/10.1109\/TASLP.2014.2341273","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR86","doi-asserted-by":"publisher","first-page":"397","DOI":"10.1109\/TASLP.2022.3216523","volume":"31","author":"ZQ Wang","year":"2022","unstructured":"Z.Q. Wang, G. Wichern, S. Watanabe, J. Le Roux, STFT-domain neural speech enhancement with very low algorithmic latency. IEEE ACM Trans. Audio Speech Lang. Process. 31, 397\u2013410 (2022). https:\/\/doi.org\/10.1109\/TASLP.2022.3216523","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR87","doi-asserted-by":"publisher","first-page":"108812","DOI":"10.1016\/j.sigpro.2023.108812","volume":"204","author":"P Wen","year":"2023","unstructured":"P. Wen, B. Wang, S. Zhang, B. Qu, X. Song, J. Sun, X. Mu, Bias-compensated augmented complex-valued NSAF algorithm and its low-complexity implementation. Signal Process. 204, 108812 (2023). https:\/\/doi.org\/10.1016\/j.sigpro.2023.108812","journal-title":"Signal Process."},{"key":"3269_CR88","doi-asserted-by":"publisher","unstructured":"T. Wu, S. He, H. Zhang, X. Zhang, ScaleFormer: transformer-based speech enhancement in the multi-scale time domain, in IEEE Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC) (2023), pp. 2448\u20132453. https:\/\/doi.org\/10.1109\/APSIPAASC58190.2023.10316174","DOI":"10.1109\/APSIPAASC58190.2023.10316174"},{"issue":"1","key":"3269_CR89","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1109\/JSTSP.2020.3040160","volume":"15","author":"Y Xian","year":"2020","unstructured":"Y. Xian, Y. Sun, W. Wang, S.M. Naqvi, A multi-scale feature recalibration network for end-to-end single channel speech enhancement. IEEE J. Sel. Top. Signal Process. 15(1), 143\u2013155 (2020). https:\/\/doi.org\/10.1109\/JSTSP.2020.3040160","journal-title":"IEEE J. Sel. Top. Signal Process."},{"key":"3269_CR90","doi-asserted-by":"publisher","first-page":"1455","DOI":"10.1109\/LSP.2021.3083731","volume":"28","author":"X Xiang","year":"2021","unstructured":"X. Xiang, X. Zhang, H. Chen, A convolutional network with multi-scale and attention mechanisms for end-to-end single-channel speech enhancement. IEEE Signal Process. Lett. 28, 1455\u20131459 (2021). https:\/\/doi.org\/10.1109\/LSP.2021.3083731","journal-title":"IEEE Signal Process. Lett."},{"key":"3269_CR91","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1109\/TASLP.2023.3234404","volume":"32","author":"Y Xiang","year":"2023","unstructured":"Y. Xiang, J.L. H\u00f8jvang, M.H. Rasmussen, M.G. Christensen, A two-stage deep representation learning-based speech enhancement method using variational autoencoder and adversarial training. IEEE ACM Trans. Audio Speech Lang. Process. 32, 164\u2013177 (2023). https:\/\/doi.org\/10.1109\/TASLP.2023.3234404","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR92","doi-asserted-by":"publisher","unstructured":"Y. Xiang, J. Tian, X. Hu, X. Xu, Z. Yin, A deep representation learning-based speech enhancement method using complex convolution recurrent variational autoencoder, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 781\u2013785. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10453976","DOI":"10.1109\/ICASSP48485.2024.10453976"},{"key":"3269_CR93","first-page":"9633","volume":"33","author":"R Xu","year":"2020","unstructured":"R. Xu, J. Zhang, Y. Luo, B. Li, Listening to sounds of silence for speech denoising. Adv. Neural. Inf. Process. Syst. 33, 9633\u20139648 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3269_CR94","doi-asserted-by":"crossref","unstructured":"W. Xu, Z. Chen, Z. Tan, S. Lv, R. Han, W. Zhou, L. Xie, MBTFNET: Multi-band temporal-frequency neural network for singing voice enhancement, in IEEE Automatic Speech Recognition and Understanding Workshop (ASRU) (2023), pp. 1\u20138.","DOI":"10.1109\/ASRU57964.2023.10389670"},{"key":"3269_CR95","doi-asserted-by":"publisher","unstructured":"X. Xu, Y. Zhang, W. Tu, Y. Yang, An efficient and interpre Table speech enhancement network via deep dictionary learning, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 10481\u201310485. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10454487","DOI":"10.1109\/ICASSP48485.2024.10454487"},{"key":"3269_CR96","doi-asserted-by":"publisher","unstructured":"H. Xue, X. Peng, Y. Lu, Low-latency speech enhancement via speech token generation, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 661\u2013665. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10454417","DOI":"10.1109\/ICASSP48485.2024.10454417"},{"key":"3269_CR97","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2022.3147845","volume":"60","author":"X Yang","year":"2022","unstructured":"X. Yang, J. Zhang, C. Chen, D. Yang, An efficient and lightweight CNN model with soft quantification for ship detection in SAR images. IEEE Trans. Geosci. Remote Sens. 60, 1\u201313 (2022). https:\/\/doi.org\/10.1109\/TGRS.2022.3147845","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"issue":"7","key":"3269_CR98","doi-asserted-by":"publisher","first-page":"4051","DOI":"10.1007\/s00034-023-02378-1","volume":"42","author":"S Yechuri","year":"2023","unstructured":"S. Yechuri, S. Vanambathina, A nested U-net with efficient channel attention and D3Net for speech enhancement. Circuits Syst. Signal Process. 42(7), 4051\u20134071 (2023). https:\/\/doi.org\/10.1007\/s00034-023-02378-1","journal-title":"Circuits Syst. Signal Process."},{"key":"3269_CR99","doi-asserted-by":"crossref","unstructured":"J. Yu, Y. Luo, Efficient monaural speech enhancement with universal sample rate band-split RNN, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023), pp. 1\u20135.","DOI":"10.1109\/ICASSP49357.2023.10096020"},{"key":"3269_CR100","doi-asserted-by":"crossref","unstructured":"M. Yu, Y. Xu, C. Zhang, S.X. Zhang, D. Yu, Neuralecho: Hybrid of full-band and sub-band recurrent neural network for acoustic echo cancellation and speech enhancement, in IEEE Automatic Speech Recognition and Understanding Workshop (ASRU) (2023), pp. 1\u20138","DOI":"10.1109\/ASRU57964.2023.10389728"},{"key":"3269_CR101","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3377214","author":"X Yu","year":"2024","unstructured":"X. Yu, M.A.A. Al-qaness, Human activity recognition using deep residual convolutional network based on wearable sensors. IEEE J. Biomed. Health Inform. (2024). https:\/\/doi.org\/10.1109\/JBHI.2024.3377214","journal-title":"IEEE J. Biomed. Health Inform."},{"issue":"1","key":"3269_CR102","doi-asserted-by":"publisher","first-page":"20620","DOI":"10.1038\/s41598-022-24909-2","volume":"12","author":"C Zhang","year":"2022","unstructured":"C. Zhang, L. Wang, Y. Qiao, W. Gao, A multi-scale feature extraction fusion model for human activity recognition. Sci. Rep. 12(1), 20620 (2022). https:\/\/doi.org\/10.1038\/s41598-022-24909-2","journal-title":"Sci. Rep."},{"key":"3269_CR103","doi-asserted-by":"publisher","first-page":"1404","DOI":"10.1109\/TASLP.2020.2987574","volume":"28","author":"Q Zhang","year":"2020","unstructured":"Q. Zhang, A. Nicolson, M. Wang, K.K. Paliwal, C. Wang, DeepMMSE: A deep learning approach to MMSE-based noise power spectral density estimation. IEEE ACM Trans. Audio Speech Lang. Process. 28, 1404\u20131415 (2020). https:\/\/doi.org\/10.1109\/TASLP.2020.2987574","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"key":"3269_CR104","doi-asserted-by":"publisher","unstructured":"S. Zhang, Z. Qiu, D. Takeuchi, N. Harada, S. Makino, Unrestricted global phase bias-aware single-channel speech enhancement with conformer-based metric GAN, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 1026\u20131030. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10453452","DOI":"10.1109\/ICASSP48485.2024.10453452"},{"key":"3269_CR105","doi-asserted-by":"publisher","unstructured":"W. Zhang, K. Saijo, Z.Q. Wang, S. Watanabe, Y. Qian, Toward universal speech enhancement for diverse input conditions, in IEEE Automatic Speech Recognition and Understanding Workshop (ASRU) (2023), pp. 1\u20136. https:\/\/doi.org\/10.1109\/ASRU59171.2023.10375585","DOI":"10.1109\/ASRU59171.2023.10375585"},{"key":"3269_CR106","doi-asserted-by":"publisher","unstructured":"C. Zhao, S. He, X. Zhang, SICRN: advancing speech enhancement through state space model and inplace convolution techniques, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2024), pp. 10506\u201310510. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10454733","DOI":"10.1109\/ICASSP48485.2024.10454733"},{"key":"3269_CR107","doi-asserted-by":"publisher","unstructured":"C. Zheng, X. Peng, Y. Zhang, S. Srinivasan, Y. Lu, Interactive speech and noise modeling for speech enhancement, in Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35 (2021), pp. 14549\u201314557. https:\/\/doi.org\/10.1609\/aaai.v35i16.17747","DOI":"10.1609\/aaai.v35i16.17747"},{"key":"3269_CR108","doi-asserted-by":"publisher","unstructured":"C. Zheng, Y. Zhou, X. Peng, Y. Zhang, Y. Lu, Real-time speech enhancement with dynamic attention span, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10095361","DOI":"10.1109\/ICASSP49357.2023.10095361"},{"key":"3269_CR109","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1109\/OJSP.2023.3305948","volume":"5","author":"K Zmolikova","year":"2023","unstructured":"K. Zmolikova, M.S. Pedersen, J. Jensen, Masked spectrogram prediction for unsupervised domain adaptation in speech enhancement. IEEE Open J. Signal Process. 5, 274\u2013283 (2023). https:\/\/doi.org\/10.1109\/OJSP.2023.3305948","journal-title":"IEEE Open J. Signal Process."},{"key":"3269_CR110","doi-asserted-by":"publisher","unstructured":"Q. Zou, C. Yuan, Y. Sun, Non-negative matrix factorization speech enhancement method based on constraints of temporal continuity, in IEEE 3rd Information Technology, Networking, Electronic and Automation Control Conference (ITNEC) (2019), pp. 2057\u20132061. https:\/\/doi.org\/10.1109\/ITNEC.2019.8729143","DOI":"10.1109\/ITNEC.2019.8729143"}],"updated-by":[{"DOI":"10.1007\/s00034-025-03396-z","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T00:00:00Z","timestamp":1764201600000}}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03269-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-025-03269-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-025-03269-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T09:14:10Z","timestamp":1774602850000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-025-03269-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,9]]},"references-count":110,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["3269"],"URL":"https:\/\/doi.org\/10.1007\/s00034-025-03269-5","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,9]]},"assertion":[{"value":"3 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 July 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 July 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 October 2025","order":6,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Update","order":7,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The original online version of this article was revised and the order of author names has been corrected","order":8,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 November 2025","order":9,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Correction","order":10,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A Correction to this paper has been published:","order":11,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"https:\/\/doi.org\/10.1007\/s00034-025-03396-z","URL":"https:\/\/doi.org\/10.1007\/s00034-025-03396-z","order":12,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared that they have no conflict of interest to this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}