{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T01:39:09Z","timestamp":1772674749803,"version":"3.50.1"},"reference-count":73,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2023,7,26]],"date-time":"2023-07-26T00:00:00Z","timestamp":1690329600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,26]],"date-time":"2023-07-26T00:00:00Z","timestamp":1690329600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s00034-023-02455-7","type":"journal-article","created":{"date-parts":[[2023,7,26]],"date-time":"2023-07-26T14:02:43Z","timestamp":1690380163000},"page":"7467-7493","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Multi-stage Progressive Learning-Based Speech Enhancement Using Time\u2013Frequency Attentive Squeezed Temporal Convolutional Networks"],"prefix":"10.1007","volume":"42","author":[{"given":"Chaitanya","family":"Jannu","sequence":"first","affiliation":[]},{"given":"Sunny Dayal","family":"Vanambathina","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,26]]},"reference":[{"key":"2455_CR1","doi-asserted-by":"crossref","unstructured":"D. Baby, S. Verhulst, Sergan: speech enhancement using relativistic generative adversarial networks with gradient penalty, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), (IEEE, 2019), pp. 106\u2013110","DOI":"10.1109\/ICASSP.2019.8683799"},{"key":"2455_CR2","unstructured":"S. Bai, J.Z. Kolter, V. Koltun, An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271 (2018)"},{"issue":"2","key":"2455_CR3","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S Boll","year":"1979","unstructured":"S. Boll, Suppression of acoustic noise in speech using spectral subtraction. IEEE Trans. Acoust. Speech Signal Process. 27(2), 113\u2013120 (1979)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"6","key":"2455_CR4","doi-asserted-by":"publisher","first-page":"4705","DOI":"10.1121\/1.4986931","volume":"141","author":"J Chen","year":"2017","unstructured":"J. Chen, D. Wang, Long short-term memory for speaker generalization in supervised speech separation. J. Acoust. Soc. Am. 141(6), 4705\u20134714 (2017)","journal-title":"J. Acoust. Soc. Am."},{"key":"2455_CR5","doi-asserted-by":"crossref","unstructured":"Z. Chen, Y. Huang, J. Li et\u00a0al. Improving mask learning based speech enhancement system with restoration layers and residual connection, in INTERSPEECH (2017), pp. 3632\u20133636","DOI":"10.21437\/Interspeech.2017-515"},{"key":"2455_CR6","doi-asserted-by":"crossref","unstructured":"A. Defossez, G. Synnaeve, Y. Adi, Real time speech enhancement in the waveform domain. arXiv preprint arXiv:2006.12847 (2020)","DOI":"10.21437\/Interspeech.2020-2409"},{"issue":"6","key":"2455_CR7","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Y. Ephraim, D. Malah, Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator. IEEE Trans. Acoust. Speech Signal Process. 32(6), 1109\u20131121 (1984)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"2","key":"2455_CR8","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1109\/TASSP.1985.1164550","volume":"33","author":"Y Ephraim","year":"1985","unstructured":"Y. Ephraim, D. Malah, Speech enhancement using a minimum mean-square error log-spectral amplitude estimator. IEEE Trans. Acoust. Speech Signal Process. 33(2), 443\u2013445 (1985)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"2455_CR9","unstructured":"C. Fan, J. Tao, B. Liu et\u00a0al. Deep attention fusion feature for speech separation with end-to-end post-filter method. arXiv preprint arXiv:2003.07544 (2020)"},{"key":"2455_CR10","doi-asserted-by":"crossref","unstructured":"Y.A. Farha, J. Gall, MS-TCN: multi-stage temporal convolutional network for action segmentation, in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2019), pp. 3575\u20133584","DOI":"10.1109\/CVPR.2019.00369"},{"key":"2455_CR11","unstructured":"S.W. Fu, C.F. Liao, Y. Tsao et\u00a0al., Metricgan: generative adversarial networks based black-box metric scores optimization for speech enhancement, in International Conference on Machine Learning, PMLR (2019) pp. 2031\u20132041"},{"key":"2455_CR12","doi-asserted-by":"crossref","unstructured":"R. Giri, U. Isik, A. Krishnaswamy, Attention Wave-U-Net for speech enhancement, in 2019 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA) (IEEE, 2019), pp. 249\u2013253","DOI":"10.1109\/WASPAA.2019.8937186"},{"key":"2455_CR13","doi-asserted-by":"crossref","unstructured":"X. Hao, X. Su, S. Wen et al., Masking and inpainting: a two-stage speech enhancement approach for low snr and non-stationary noise, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2020), pp. 6959\u20136963","DOI":"10.1109\/ICASSP40776.2020.9053188"},{"key":"2455_CR14","doi-asserted-by":"crossref","unstructured":"K. He, X. Zhang, S. Ren et\u00a0al., Delving deep into rectifiers: surpassing human-level performance on imagenet classification, in Proceedings of the IEEE International Conference on Computer Vision (2015), pp 1026\u20131034","DOI":"10.1109\/ICCV.2015.123"},{"key":"2455_CR15","doi-asserted-by":"crossref","unstructured":"J. Hu, L. Shen, G. Sun, Squeeze-and-excitation networks, in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2018), pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"issue":"12","key":"2455_CR16","doi-asserted-by":"publisher","first-page":"1458","DOI":"10.1016\/j.apacoust.2013.06.001","volume":"74","author":"X Hu","year":"2013","unstructured":"X. Hu, S. Wang, C. Zheng et al., A cepstrum-based preprocessing and postprocessing for speech enhancement in adverse environments. Appl. Acoust. 74(12), 1458\u20131462 (2013)","journal-title":"Appl. Acoust."},{"key":"2455_CR17","doi-asserted-by":"publisher","DOI":"10.1142\/S0219467824500438","author":"C Jannu","year":"2023","unstructured":"C. Jannu, S.D. Vanambathina, Shuffle attention u-Net for speech enhancement in time domain. Int. J. Image Graph. (2023). https:\/\/doi.org\/10.1142\/S0219467824500438","journal-title":"Int. J. Image Graph."},{"key":"2455_CR18","doi-asserted-by":"crossref","unstructured":"J. Kim, M. El-Khamy, J. Lee, T-GSA: transformer with gaussian-weighted self-attention for speech enhancement, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2020), pp. 6649\u20136653","DOI":"10.1109\/ICASSP40776.2020.9053591"},{"key":"2455_CR19","unstructured":"D.P. Kingma, J. Ba, Adam: a method for stochastic optimization 3rd int, in Conf. for Learning Representations, San (2014)"},{"key":"2455_CR20","doi-asserted-by":"crossref","unstructured":"V. Kishore, N. Tiwari, P. Paramasivam, Improved speech enhancement using TCN with multiple encoder\u2013decoder layers, in Interspeech (2020), pp. 4531\u20134535","DOI":"10.21437\/Interspeech.2020-3122"},{"key":"2455_CR21","doi-asserted-by":"crossref","unstructured":"Y. Koizumi, K. Yatabe, M. Delcroix et al., Speech enhancement using self-adaptation and multi-head self-attention, in ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2020), pp. 181\u2013185","DOI":"10.1109\/ICASSP40776.2020.9053214"},{"key":"2455_CR22","unstructured":"Y. Koyama, T. Vuong, S. Uhlich et al., Exploring the best loss function for DNN-based low-latency speech enhancement with temporal convolutional networks (2020). arXiv preprint arXiv:2005.11611"},{"key":"2455_CR23","first-page":"2422","volume":"2020","author":"A Li","year":"2020","unstructured":"A. Li, C. Zheng, C. Fan et al., A recursive network with dynamic attention for monaural speech enhancement. Proc. Interspeech 2020, 2422\u20132426 (2020)","journal-title":"Proc. Interspeech"},{"key":"2455_CR24","unstructured":"A. Li, C. Zheng, R. Peng et al., Two heads are better than one: a two-stage approach for monaural noise reduction in the complex domain (2020). arXiv preprint arXiv:2011.01561"},{"key":"2455_CR25","doi-asserted-by":"publisher","first-page":"108499","DOI":"10.1016\/j.apacoust.2021.108499","volume":"187","author":"A Li","year":"2022","unstructured":"A. Li, C. Zheng, L. Zhang et al., Glance and gaze: a collaborative learning framework for single-channel speech enhancement. Appl. Acoust. 187, 108499 (2022)","journal-title":"Appl. Acoust."},{"key":"2455_CR26","doi-asserted-by":"crossref","unstructured":"J. Lin, S. Niu, Z. Wei et al., Speech enhancement using forked generative adversarial networks with spectral subtraction, in Proceedings of Interspeech (2019)","DOI":"10.21437\/Interspeech.2019-2954"},{"key":"2455_CR27","doi-asserted-by":"crossref","unstructured":"J. Lin, S. Niu, A.J. Wijngaarden et al., Improved speech enhancement using a time-domain GAN with mask learning, in Proceedings of Interspeech 2020 (2020)","DOI":"10.21437\/Interspeech.2020-1946"},{"key":"2455_CR28","doi-asserted-by":"publisher","first-page":"3440","DOI":"10.1109\/TASLP.2021.3125143","volume":"29","author":"J Lin","year":"2021","unstructured":"J. Lin, AJd.L. van Wijngaarden, K.C. Wang et al., Speech enhancement using multi-stage self-attentive temporal convolutional networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 3440\u20133450 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR29","doi-asserted-by":"publisher","first-page":"1888","DOI":"10.1109\/TASLP.2020.2976193","volume":"28","author":"CL Liu","year":"2020","unstructured":"C.L. Liu, S.W. Fu, Y.J. Li et al., Multichannel speech enhancement by raw waveform-mapping using fully convolutional networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 28, 1888\u20131900 (2020)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR30","first-page":"588","volume":"49","author":"P Loizou","year":"2017","unstructured":"P. Loizou, Y. Hu, Noizeus: a noisy speech corpus for evaluation of speech enhancement algorithms. Speech Commun. 49, 588\u2013601 (2017)","journal-title":"Speech Commun."},{"issue":"8","key":"2455_CR31","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/TASLP.2019.2915167","volume":"27","author":"Y Luo","year":"2019","unstructured":"Y. Luo, N. Mesgarani, Conv-TasNet: surpassing ideal time-frequency magnitude masking for speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(8), 1256\u20131266 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR32","unstructured":"C. Macartney, T. Weyde, Improved speech enhancement with the wave-U-Net (2018). arXiv preprint arXiv:1811.11307"},{"issue":"4","key":"2455_CR33","doi-asserted-by":"publisher","first-page":"826","DOI":"10.1109\/TASLP.2014.2305833","volume":"22","author":"A Narayanan","year":"2014","unstructured":"A. Narayanan, D. Wang, Investigation of speech separation as a front-end for noise robust speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(4), 826\u2013835 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR34","unstructured":"A. van\u00a0den Oord, S. Dieleman, H. Zen et al., Wavenet: a generative model for raw audio, in 9th ISCA Speech Synthesis Workshop (2016), pp. 125\u2013125"},{"key":"2455_CR35","doi-asserted-by":"crossref","unstructured":"V. Panayotov, G. Chen, D. Povey et al., Librispeech: an ASR corpus based on public domain audio books, in 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2015), pp. 5206\u20135210","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"2455_CR36","doi-asserted-by":"crossref","unstructured":"A. Pandey, D. Wang, On adversarial training and loss functions for speech enhancement, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2018), pp. 5414\u20135418","DOI":"10.1109\/ICASSP.2018.8462614"},{"issue":"7","key":"2455_CR37","doi-asserted-by":"publisher","first-page":"1179","DOI":"10.1109\/TASLP.2019.2913512","volume":"27","author":"A Pandey","year":"2019","unstructured":"A. Pandey, D. Wang, A new framework for CNN-based speech enhancement in the time domain. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(7), 1179\u20131188 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR38","doi-asserted-by":"crossref","unstructured":"A. Pandey, D. Wang, TCNN: temporal convolutional neural network for real-time speech enhancement in the time domain, in ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2019), pp. 6875\u20136879","DOI":"10.1109\/ICASSP.2019.8683634"},{"key":"2455_CR39","doi-asserted-by":"publisher","first-page":"1270","DOI":"10.1109\/TASLP.2021.3064421","volume":"29","author":"A Pandey","year":"2021","unstructured":"A. Pandey, D. Wang, Dense CNN with self-attention for time-domain speech enhancement. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 1270\u20131279 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR40","doi-asserted-by":"crossref","unstructured":"S. Pascual, A. Bonafonte, J. Serra, Segan: speech enhancement generative adversarial network (2017). arXiv preprint arXiv:1703.09452","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"2455_CR41","doi-asserted-by":"crossref","unstructured":"H. Phan, H. Le Nguyen, O.Y. Ch\u00e9n et al., Self-attention generative adversarial network for speech enhancement, in ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2021), pp. 7103\u20137107","DOI":"10.1109\/ICASSP39728.2021.9414265"},{"key":"2455_CR42","doi-asserted-by":"crossref","unstructured":"C.K. Reddy, H. Dubey, V. Gopal et al., ICASSP 2021 deep noise suppression challenge, in ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2021), pp. 6623\u20136627","DOI":"10.1109\/ICASSP39728.2021.9415105"},{"key":"2455_CR43","doi-asserted-by":"crossref","unstructured":"C.K. Reddy, V. Gopal, R. Cutler, DNSMOS: a non-intrusive perceptual objective speech quality metric to evaluate noise suppressors, in ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2021), pp. 6493\u20136497","DOI":"10.1109\/ICASSP39728.2021.9414878"},{"key":"2455_CR44","doi-asserted-by":"crossref","unstructured":"A.W. Rix, J.G. Beerends, M.P. Hollier et al., Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs, in 2001 IEEE international conference on acoustics, speech, and signal processing. Proceedings (Cat. No. 01CH37221) (IEEE, 2001), pp. 749\u2013752","DOI":"10.1109\/ICASSP.2001.941023"},{"key":"2455_CR45","doi-asserted-by":"crossref","unstructured":"M.H. Soni, N. Shah, H.A. Patil Time-frequency masking-based speech enhancement using generative adversarial network, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2018), pp. 5039\u20135043","DOI":"10.1109\/ICASSP.2018.8462068"},{"issue":"7","key":"2455_CR46","doi-asserted-by":"publisher","first-page":"2125","DOI":"10.1109\/TASL.2011.2114881","volume":"19","author":"CH Taal","year":"2011","unstructured":"C.H. Taal, R.C. Hendriks, R. Heusdens et al., An algorithm for intelligibility prediction of time-frequency weighted noisy speech. IEEE Trans. Audio Speech Lang. Process. 19(7), 2125\u20132136 (2011)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2455_CR47","doi-asserted-by":"crossref","unstructured":"K. Tan, D. Wang, A convolutional recurrent neural network for real-time speech enhancement, in Interspeech (2018), pp. 3229\u20133233","DOI":"10.21437\/Interspeech.2018-1405"},{"issue":"1","key":"2455_CR48","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1109\/TASLP.2018.2876171","volume":"27","author":"K Tan","year":"2018","unstructured":"K. Tan, J. Chen, D. Wang, Gated residual networks with dilated convolutions for monaural speech enhancement. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(1), 189\u2013198 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR49","doi-asserted-by":"crossref","unstructured":"C. Tang, C. Luo, Z. Zhao et al., Joint time-frequency and time domain learning for speech enhancement, in Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence (2021), pp. 3816\u20133822","DOI":"10.24963\/ijcai.2020\/528"},{"issue":"2","key":"2455_CR50","doi-asserted-by":"publisher","first-page":"1454","DOI":"10.1016\/j.jfranklin.2022.11.004","volume":"360","author":"H Tao","year":"2023","unstructured":"H. Tao, J. Qiu, Y. Chen et al., Unsupervised cross-domain rolling bearing fault diagnosis based on time-frequency information fusion. J. Franklin Inst. 360(2), 1454\u20131477 (2023)","journal-title":"J. Franklin Inst."},{"key":"2455_CR51","doi-asserted-by":"crossref","unstructured":"J. Thiemann, N. Ito, E. Vincent, The diverse environments multi-channel acoustic noise database (demand): a database of multichannel environmental noise recordings, in Proceedings of Meetings on Acoustics ICA2013 (Acoustical Society of America, 2013), p. 035081","DOI":"10.1121\/1.4799597"},{"key":"2455_CR52","unstructured":"C. Valentini-Botinhao, et al., Noisy Speech Database for Training Speech Enhancement Algorithms and TTS Models. University of Edinburgh School of Informatics Centre for Speech Technology Research (CSTR) (2017)"},{"issue":"3","key":"2455_CR53","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1016\/0167-6393(93)90095-3","volume":"12","author":"A Varga","year":"1993","unstructured":"A. Varga, H.J. Steeneken, Assessment for automatic speech recognition: II. NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems. Speech Commun. 12(3), 247\u2013251 (1993)","journal-title":"Speech Commun."},{"key":"2455_CR54","unstructured":"A. Vaswani, N. Shazeer, N. Parmar et al., Attention is all you need, in Advances in Neural Information Processing Systems, vol. 30"},{"key":"2455_CR55","doi-asserted-by":"crossref","unstructured":"C. Veaux, J. Yamagishi, S. King, The voice bank corpus: design, collection and data analysis of a large regional accent speech database, in 2013 International Conference Oriental COCOSDA Held Jointly with 2013 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA\/CASLRE) (IEEE, 2013), pp. 1\u20134","DOI":"10.1109\/ICSDA.2013.6709856"},{"key":"2455_CR56","doi-asserted-by":"crossref","unstructured":"D. Wang, On ideal binary mask as the computational goal of auditory scene analysis, in Speech Separation by Humans and Machines (2005), pp. 181\u2013197","DOI":"10.1007\/0-387-22794-6_12"},{"key":"2455_CR57","doi-asserted-by":"crossref","unstructured":"Q. Wang, H. Muckenhirn, K. Wilson et al., Voicefilter: targeted voice separation by speaker-conditioned spectrogram masking. arXiv preprint arXiv:1810.04826 (2018)","DOI":"10.21437\/Interspeech.2019-1101"},{"key":"2455_CR58","doi-asserted-by":"crossref","unstructured":"Q. Wang, B. Wu, P. Zhu et al., ECA-Net: efficient channel attention for deep convolutional neural networks, in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2020), pp. 11534\u201311542","DOI":"10.1109\/CVPR42600.2020.01155"},{"issue":"7","key":"2455_CR59","doi-asserted-by":"publisher","first-page":"1381","DOI":"10.1109\/TASL.2013.2250961","volume":"21","author":"Y Wang","year":"2013","unstructured":"Y. Wang, D. Wang, Towards scaling up classification-based speech separation. IEEE Trans. Audio Speech Lang. Process. 21(7), 1381\u20131390 (2013)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"12","key":"2455_CR60","doi-asserted-by":"publisher","first-page":"1849","DOI":"10.1109\/TASLP.2014.2352935","volume":"22","author":"Y Wang","year":"2014","unstructured":"Y. Wang, A. Narayanan, D. Wang, On training targets for supervised speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(12), 1849\u20131858 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR61","doi-asserted-by":"crossref","unstructured":"S. Woo, J. Park, J.Y. Lee et al., CBAM: convolutional block attention module, in Proceedings of the European Conference on Computer Vision (ECCV) (2018), pp. 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"2455_CR62","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1109\/LSP.2021.3128374","volume":"29","author":"X Xiang","year":"2021","unstructured":"X. Xiang, X. Zhang, H. Chen, A nested u-net with self-attention and dense connectivity for monaural speech enhancement. IEEE Signal Process. Lett. 29, 105\u2013109 (2021)","journal-title":"IEEE Signal Process. Lett."},{"issue":"1","key":"2455_CR63","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1109\/TASLP.2014.2364452","volume":"23","author":"Y Xu","year":"2014","unstructured":"Y. Xu, J. Du, L.R. Dai et al., A regression approach to speech enhancement based on deep neural networks. IEEE\/ACM Trans. Audio Speech Lang. Process. 23(1), 7\u201319 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR64","unstructured":"F. Yu, V. Koltun, Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"2455_CR65","unstructured":"Q. Zhang, A. Nicolson, M. Wang, et al., Monaural speech enhancement using a multi-branch temporal convolutional network. arXiv preprint arXiv:1912.12023 (2019)"},{"key":"2455_CR66","doi-asserted-by":"publisher","first-page":"1404","DOI":"10.1109\/TASLP.2020.2987441","volume":"28","author":"Q Zhang","year":"2020","unstructured":"Q. Zhang, A. Nicolson, M. Wang et al., DeepMMSE: a deep learning approach to MMSE-based noise power spectral density estimation. IEEE\/ACM Trans. Audio Speech Lang. Process. 28, 1404\u20131415 (2020)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR67","doi-asserted-by":"crossref","first-page":"166","DOI":"10.21437\/Interspeech.2021-46","volume":"2021","author":"Q Zhang","year":"2021","unstructured":"Q. Zhang, Q. Song, A. Nicolson et al., Temporal convolutional network with frequency dimension adaptive attention for speech enhancement. Proc. Interspeech 2021, 166\u2013170 (2021)","journal-title":"Proc. Interspeech"},{"key":"2455_CR68","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1109\/TASLP.2022.3225649","volume":"31","author":"Q Zhang","year":"2022","unstructured":"Q. Zhang, X. Qian, Z. Ni et al., A time-frequency attention module for neural speech enhancement. IEEE\/ACM Trans. Audio Speech Lang. Process. 31, 462\u2013475 (2022)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR69","doi-asserted-by":"crossref","unstructured":"Y. Zhao, D. Wang, Noisy-reverberant speech enhancement using denseunet with time-frequency attention, in Interspeech (2020), pp. 3261\u20133265","DOI":"10.21437\/Interspeech.2020-2952"},{"key":"2455_CR70","doi-asserted-by":"publisher","first-page":"1598","DOI":"10.1109\/TASLP.2020.2995273","volume":"28","author":"Y Zhao","year":"2020","unstructured":"Y. Zhao, D. Wang, B. Xu et al., Monaural speech dereverberation using temporal convolutional networks with self attention. IEEE\/ACM Trans. Audio Speech Lang. Process. 28, 1598\u20131607 (2020)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2455_CR71","doi-asserted-by":"crossref","unstructured":"C. Zheng, X. Peng, Y. Zhang et al., Interactive speech and noise modeling for speech enhancement, in Proceedings of the AAAI Conference on Artificial Intelligence (2021), pp. 14549\u201314557","DOI":"10.1609\/aaai.v35i16.17710"},{"issue":"18","key":"2455_CR72","doi-asserted-by":"publisher","first-page":"10139","DOI":"10.1002\/rnc.6354","volume":"32","author":"C Zhou","year":"2022","unstructured":"C. Zhou, H. Tao, Y. Chen et al., Robust point-to-point iterative learning control for constrained systems: a minimum energy approach. Int. J. Robust Nonlinear Control 32(18), 10139\u201310161 (2022)","journal-title":"Int. J. Robust Nonlinear Control"},{"key":"2455_CR73","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3225381","author":"Z Zhuang","year":"2022","unstructured":"Z. Zhuang, H. Tao, Y. Chen et al., An optimal iterative learning control approach for linear systems with nonuniform trial lengths under input constraints. IEEE Trans. Syst. Man Cybern. Syst. (2022). https:\/\/doi.org\/10.1109\/TSMC.2022.3225381","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02455-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-023-02455-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02455-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T02:51:10Z","timestamp":1729824670000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-023-02455-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,26]]},"references-count":73,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["2455"],"URL":"https:\/\/doi.org\/10.1007\/s00034-023-02455-7","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,7,26]]},"assertion":[{"value":"12 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 July 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 July 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared that they have no conflict of interest to this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}