{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T17:56:11Z","timestamp":1776880571341,"version":"3.51.2"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T00:00:00Z","timestamp":1757030400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T00:00:00Z","timestamp":1757030400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62271344"],"award-info":[{"award-number":["62271344"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04534-5","type":"journal-article","created":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T15:52:15Z","timestamp":1757087535000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Speech enhancement using complex depthwise convolutional recurrent network with self-attention mechanism"],"prefix":"10.1007","volume":"19","author":[{"given":"Yasir","family":"Iqbal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anjum","family":"Iqbal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajia","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ikram","family":"Azaz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiyun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ijaz","family":"Hussain","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanzhang","family":"Geng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,5]]},"reference":[{"key":"4534_CR1","doi-asserted-by":"publisher","first-page":"101549","DOI":"10.1016\/j.csl.2023.101549","volume":"83","author":"C Karthikeyan","year":"2024","unstructured":"Karthikeyan, C., Kumar, T.R., Babu, D.V., Baskar, M., Jayaraman, R., Shahid, M.: Speech enhancement approach for body-conducted unvoiced speech based on Taylor\u2013Boltzmann machines trained DNN. Comput. Speech Lang. 83, 101549 (2024)","journal-title":"Comput. Speech Lang."},{"key":"4534_CR2","doi-asserted-by":"crossref","unstructured":"Nakadai, K., Hidai, K., Okuno, H.G., Kitano, H.: Real-time speaker localization and speech separation by audio-visual integration, in Proceedings IEEE International Conference on Robotics and Automation (Cat. No. 02CH37292), 2002, vol. 1: IEEE, pp. 1043\u20131049. (2002)","DOI":"10.1109\/ROBOT.2002.1013493"},{"key":"4534_CR3","doi-asserted-by":"publisher","first-page":"108499","DOI":"10.1016\/j.apacoust.2021.108499","volume":"187","author":"A Li","year":"2022","unstructured":"Li, A., Zheng, C., Zhang, L., Li, X.: Glance and gaze: A collaborative learning framework for single-channel speech enhancement. Appl. Acoust. 187, 108499 (2022)","journal-title":"Appl. Acoust."},{"key":"4534_CR4","doi-asserted-by":"publisher","unstructured":"Jannu, C., Burra, M., Vanambathina, S.D., Parisae, V., Krishna, C.V.M., Madhumati, G.L.: Single channel speech enhancement using a complex Dual-Path multi axial transformer with frequency prompt. Circuits Syst. Signal. Process., 44, 6, pp. 4224\u20134257, 2025\/06\/01 2025, https:\/\/doi.org\/10.1007\/s00034-025-03010-2","DOI":"10.1007\/s00034-025-03010-2"},{"issue":"2","key":"4534_CR5","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1109\/JSTSP.2019.2908760","volume":"13","author":"Y Sun","year":"2019","unstructured":"Sun, Y., Xian, Y., Wang, W., Naqvi, S.M.: Monaural source separation in complex domain with long short-term memory neural network. IEEE J. Selec. Topics Signal Process. 13(2), 359\u2013369 (2019)","journal-title":"IEEE J. Selec. Topics Signal Process."},{"issue":"Suppl 3","key":"4534_CR6","doi-asserted-by":"publisher","first-page":"3651","DOI":"10.1007\/s10462-023-10612-2","volume":"56","author":"P Ochieng","year":"2023","unstructured":"Ochieng, P.: Deep neural network techniques for monaural speech enhancement and separation: State of the Art analysis. Artif. Intell. Rev. 56(Suppl 3), 3651\u20133703 (2023)","journal-title":"Artif. Intell. Rev."},{"key":"4534_CR7","doi-asserted-by":"publisher","first-page":"101618","DOI":"10.1016\/j.csl.2024.101618","volume":"86","author":"Y Li","year":"2024","unstructured":"Li, Y., Sun, M., Zhang, X.: Scale-aware dual-branch complex convolutional recurrent network for monaural speech enhancement. Comput. Speech Lang. 86, 101618 (2024)","journal-title":"Comput. Speech Lang."},{"key":"4534_CR8","unstructured":"Pandey, A., Wang, D.: Dense CNN with Self-Attention for Time-Domain speech enhancement, (in eng), no. 2329\u20139290 (Print)."},{"key":"4534_CR9","doi-asserted-by":"crossref","unstructured":"Rethage, D., Pons, J., Serra, X.: A wavenet for speech denoising, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018: IEEE, pp. 5069\u20135073. (2018)","DOI":"10.1109\/ICASSP.2018.8462417"},{"key":"4534_CR10","doi-asserted-by":"crossref","unstructured":"Abdulbaqi, J., Gu, Y., Chen, S., Marsic, I.: Residual recurrent neural network for speech enhancement, in ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP),: IEEE, pp. 6659\u20136663. (2020)","DOI":"10.1109\/ICASSP40776.2020.9053544"},{"key":"4534_CR11","doi-asserted-by":"publisher","unstructured":"Saleem, N., Gunawan, T.S., Dhahbi, S., Bourouis, S.: Time domain speech enhancement with CNN and time-attention transformer. Digit. Signal Proc., 147, p. 104408, 2024\/04\/01\/ 2024, doi: https:\/\/doi.org\/10.1016\/j.dsp.2024.104408","DOI":"10.1016\/j.dsp.2024.104408"},{"key":"4534_CR12","doi-asserted-by":"publisher","unstructured":"Jannu, C., Vanambathina, S.D.: Multi-stage Progressive Learning-Based Speech Enhancement Using Time\u2013Frequency Attentive Squeezed Temporal Convolutional Networks, Circuits, Systems, and Signal Processing, vol. 42, no. 12, pp. 7467\u20137493, 2023\/12\/01 2023. https:\/\/doi.org\/10.1007\/s00034-023-02455-7","DOI":"10.1007\/s00034-023-02455-7"},{"issue":"4","key":"4534_CR13","first-page":"10907","volume":"46","author":"V Parisae","year":"2024","unstructured":"Parisae, V., Nagakishore Bhavanam, S.: Multi scale encoder-decoder network with time frequency attention and s-tcn for single channel speech enhancement. J. Intell. Fuzzy Syst. 46(4), 10907\u201310907 (2024)","journal-title":"J. Intell. Fuzzy Syst."},{"key":"4534_CR14","doi-asserted-by":"crossref","unstructured":"Parisae, V., Nagakishore Bhavanam, S.: Stacked u-net with time\u2013frequency attention and deep connection net for single channel speech enhancement. Int. J. Image Graphics, p. 2550067, (2024)","DOI":"10.1142\/S0219467825500676"},{"key":"4534_CR15","doi-asserted-by":"crossref","unstructured":"Phan, H., et al.: Self-attention generative adversarial network for speech enhancement, in ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP),: IEEE, pp. 7103\u20137107. (2021)","DOI":"10.1109\/ICASSP39728.2021.9414265"},{"key":"4534_CR16","doi-asserted-by":"publisher","unstructured":"Wang, D., Chen, J.: Supervised Speech Separation Based on Deep Learning: An Overview, IEEE\/ACM Transactions on Audio, Speech, and Language Processing, vol. 26, no. 10, pp. 1702\u20131726, (2018). https:\/\/doi.org\/10.1109\/TASLP.2018.2842159","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"4534_CR17","doi-asserted-by":"crossref","unstructured":"Hu, Y., et al.: DCCRN: Deep complex convolution recurrent network for phase-aware speech enhancement, arXiv preprint arXiv:2008.00264, (2020)","DOI":"10.21437\/Interspeech.2020-2537"},{"key":"4534_CR18","doi-asserted-by":"publisher","unstructured":"Vanambathina, S.D., Burra, M., Edupalli, B., Vallem, E.R., Nellore, V.S.: Real time speech enhancement using densely connected neural networks and squeezed Temporal convolutional modules. Multimedia Tools Appl., 83, 17, pp. 50289\u201350305, 2024\/05\/01 2024, https:\/\/doi.org\/10.1007\/s11042-023-17492-2","DOI":"10.1007\/s11042-023-17492-2"},{"key":"4534_CR19","doi-asserted-by":"crossref","unstructured":"Lu, Y.-X., Ai, Y., Ling, Z.-H.: MP-SENet: A speech enhancement model with parallel denoising of magnitude and phase spectra, arXiv preprint arXiv:2305.13686, (2023)","DOI":"10.21437\/Interspeech.2023-1441"},{"key":"4534_CR20","doi-asserted-by":"crossref","unstructured":"Ouyang, Z., Yu, H., Zhu, W.-P., Champagne, B.: A fully convolutional neural network for complex spectrogram processing in speech enhancement, in ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP),: IEEE, pp. 5756\u20135760. (2019)","DOI":"10.1109\/ICASSP.2019.8683423"},{"key":"4534_CR21","doi-asserted-by":"crossref","unstructured":"Sivarambabu, P., Agrawal, R., Tirumala, A., Subani, S.M., Parisae, V., Nukala, S.S.: Enhancing cloud security through AI-Driven intrusion detection utilizing deep learning methods and autoencoder technology. Generative Artif. Intelligence: Concepts Appl., pp. 249\u2013264, (2025)","DOI":"10.1002\/9781394209835.ch15"},{"issue":"1","key":"4534_CR22","doi-asserted-by":"publisher","first-page":"e0291240","DOI":"10.1371\/journal.pone.0291240","volume":"19","author":"Z Li","year":"2024","unstructured":"Li, Z., Basit, A., Daraz, A., Jan, A.: Deep causal speech enhancement and recognition using efficient long-short term memory recurrent neural network. PLOS ONE. 19(1), e0291240 (2024). https:\/\/doi.org\/10.1371\/journal.pone.0291240","journal-title":"PLOS ONE"},{"key":"4534_CR23","doi-asserted-by":"crossref","unstructured":"Tan, K., Wang, D.: A convolutional recurrent neural network for real-time speech enhancement, in Interspeech, vol. 2018, pp. 3229\u20133233. (2018)","DOI":"10.21437\/Interspeech.2018-1405"},{"key":"4534_CR24","unstructured":"Pandey, A., Wang, D.: Dual-path self-attention RNN for real-time speech enhancement, arXiv preprint arXiv:2010.12713, (2020)"},{"issue":"2","key":"4534_CR25","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1007\/s11042-024-19076-0","volume":"84","author":"V Parisae","year":"2025","unstructured":"Parisae, V., Bhavanam, S.N.: Adaptive attention mechanism for single channel speech enhancement. Multimedia Tools Appl. 84(2), 831\u2013856 (2025)","journal-title":"Multimedia Tools Appl."},{"issue":"1","key":"4534_CR26","doi-asserted-by":"publisher","first-page":"e70016","DOI":"10.1111\/coin.70016","volume":"41","author":"C Jannu","year":"2025","unstructured":"Jannu, C., Burra, M., Vanambathina, S.D., Parisae, V.: Real-Time single channel speech enhancement using triple attention and stacked Squeeze\u2010TCN. Comput. Intell. 41(1), e70016 (2025)","journal-title":"Comput. Intell."},{"key":"4534_CR27","doi-asserted-by":"crossref","unstructured":"Vanambathina, S.D., Nandyala, S., Jannu, C., Sirisha Devi, J., Yechuri, S., Parisae, V.: Speech enhancement using U-net-based progressive learning with squeeze-TCN, in International Conference on Advances in Distributed Computing and Machine Learning,: Springer, pp. 419\u2013432. (2024)","DOI":"10.1007\/978-981-97-3523-5_31"},{"key":"4534_CR28","unstructured":"Nguyen, T., Raich, R., Fern, X.: IEEE 27th International Workshop on Machine Learning for Signal Processing (MLSP), ed: IEEE, (2017)"},{"key":"4534_CR29","doi-asserted-by":"crossref","unstructured":"Tokala, V., Grinstein, E., Brookes, M., Doclo, S., Jensen, J., Naylor, P.A.: Binaural Speech Enhancement Using Deep Complex Convolutional Transformer Networks, in ICASSP 2024\u20132024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP),: IEEE, pp. 681\u2013685. (2024)","DOI":"10.1109\/ICASSP48485.2024.10447090"},{"key":"4534_CR30","doi-asserted-by":"publisher","unstructured":"Burra, M., Vanambathina, S.D., A, V.A.L., Ch, L., N, S.K.: Cross channel interaction based ECA-Net using gated recurrent convolutional network for speech enhancement. Multimedia Tools Appl., 2024\/06\/28 2024, https:\/\/doi.org\/10.1007\/s11042-024-19744-1","DOI":"10.1007\/s11042-024-19744-1"},{"key":"4534_CR31","doi-asserted-by":"crossref","unstructured":"Grais, E.M., Ward, D., Plumbley, M.D.: Raw multi-channel audio source separation using multi-resolution convolutional auto-encoders, in 26th European Signal Processing Conference (EUSIPCO), 2018: IEEE, pp. 1577\u20131581. (2018)","DOI":"10.23919\/EUSIPCO.2018.8553571"},{"key":"4534_CR32","doi-asserted-by":"crossref","unstructured":"Iqbal, Y., Zhang, T., Fahad, M., Iqbal, A., Geng, Y., Zhao, X.: Speech enhancement using deep complex convolutional neural network (DCCNN) model, Signal, Image and Video Processing, pp. 1\u201318, (2024)","DOI":"10.1007\/s11760-024-03500-x"},{"issue":"5","key":"4534_CR33","doi-asserted-by":"publisher","first-page":"2225","DOI":"10.1007\/s10115-022-01818-x","volume":"65","author":"J-G Jang","year":"2023","unstructured":"Jang, J.-G., Quan, C., Lee, H.D., Kang, U.: Falcon: Lightweight and accurate Convolution based on depthwise separable Convolution. Knowl. Inf. Syst. 65(5), 2225\u20132249 (2023)","journal-title":"Knowl. Inf. Syst."},{"key":"4534_CR34","unstructured":"Garofolo, J., Graff, D., Paul, D., Pallett, D.: Csr-i (wsj0) complete ldc93s6a. Web Download Philadelphia: Linguistic Data Consortium, 83, (1993)"},{"key":"4534_CR35","doi-asserted-by":"crossref","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning representations by back-propagating errors, nature, vol. 323, no. 6088, pp. 533\u2013536, (1986)","DOI":"10.1038\/323533a0"},{"key":"4534_CR36","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks, in Proceedings of the thirteenth international conference on artificial intelligence and statistics,: JMLR Workshop and Conference Proceedings, pp. 249\u2013256. (2010)"},{"key":"4534_CR37","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization, arXiv preprint arXiv:1412.6980, (2014)"},{"key":"4534_CR38","doi-asserted-by":"crossref","unstructured":"Rix, A.W., Beerends, J.G., Hollier, M.P., Hekstra, A.P.: Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs, in IEEE international conference on acoustics, speech, and signal processing. Proceedings (Cat. No. 01CH37221), 2001, vol. 2: IEEE, pp. 749\u2013752. (2001)","DOI":"10.1109\/ICASSP.2001.941023"},{"key":"4534_CR39","doi-asserted-by":"crossref","unstructured":"Taal, C.H., Hendriks, R.C., Heusdens, R., Jensen, J.: A short-time objective intelligibility measure for time-frequency weighted noisy speech, in IEEE international conference on acoustics, speech and signal processing, 2010: IEEE, pp. 4214\u20134217. (2010)","DOI":"10.1109\/ICASSP.2010.5495701"},{"key":"4534_CR40","doi-asserted-by":"publisher","unstructured":"Wahab, F.E., Ye, Z., Saleem, N., Ullah, R.: Compact deep neural networks for real-time speech enhancement on resource-limited devices. Speech Commun., 156, p. 103008, 2024\/01\/01\/ 2024, doi: https:\/\/doi.org\/10.1016\/j.specom.2023.103008","DOI":"10.1016\/j.specom.2023.103008"},{"key":"4534_CR41","unstructured":"Ivanov, A.V., et al.: Speed vs. accuracy: Designing an optimal asr system for spontaneous non-native speech in a real-time application, Proc. of the IWSDS, Saariselk, Finland, (2016)"},{"key":"4534_CR42","doi-asserted-by":"crossref","unstructured":"Defossez, A., Synnaeve, G., Adi, Y.: Real time speech enhancement in the waveform domain, arXiv preprint arXiv:.12847, 2020. (2006)","DOI":"10.21437\/Interspeech.2020-2409"},{"key":"4534_CR43","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/TASLP.2019.2955276","volume":"28","author":"K Tan","year":"2019","unstructured":"Tan, K., Wang, D.: Learning complex spectral mapping with gated convolutional recurrent networks for monaural speech enhancement. IEEE\/ACM Trans. Audio Speech Lang. Process. 28, 380\u2013390 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4534_CR44","doi-asserted-by":"crossref","unstructured":"Zhao, S., Ma, B., Watcharasupat, K.N., Gan, W.-S.: FRCRN: Boosting feature representation using frequency recurrence for monaural speech enhancement, in ICASSP 2022\u20132022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP),: IEEE, pp. 9281\u20139285. (2022)","DOI":"10.1109\/ICASSP43922.2022.9747578"},{"key":"4534_CR45","doi-asserted-by":"crossref","unstructured":"Luo, Y., Mesgarani, N.: Conv-tasnet: Surpassing ideal time\u2013frequency magnitude masking for speech separation, IEEE\/ACM transactions on audio, speech, and language processing, vol. 27, no. 8, pp. 1256\u20131266, (2019)","DOI":"10.1109\/TASLP.2019.2915167"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04534-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04534-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04534-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T13:16:07Z","timestamp":1758546967000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04534-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,5]]},"references-count":45,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4534"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04534-5","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,5]]},"assertion":[{"value":"27 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 June 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 July 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 September 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"990"}}