{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T01:11:52Z","timestamp":1754183512719,"version":"3.41.2"},"reference-count":32,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"8","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Fundamentals"],"published-print":{"date-parts":[[2025,8,1]]},"DOI":"10.1587\/transfun.2024eap1173","type":"journal-article","created":{"date-parts":[[2025,1,30]],"date-time":"2025-01-30T17:13:22Z","timestamp":1738257202000},"page":"996-1004","source":"Crossref","is-referenced-by-count":0,"title":["Monaural Speech Enhancement with Full-Convolution Attention Module and Post-Processing Strategy"],"prefix":"10.1587","volume":"E108.A","author":[{"given":"Lin","family":"ZHOU","sequence":"first","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanxiang","family":"CAO","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qirui","family":"WANG","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunling","family":"CHENG","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenghao","family":"ZHUANG","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuxi","family":"DENG","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] J. Abdulbaqi, Y. Gu, SH. Chen, and I. Marsic, \u201cResidual recurrent neural network for speech enhancement,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.6659-6663, 2020. 10.1109\/ICASSP40776.2020.9053544","DOI":"10.1109\/ICASSP40776.2020.9053544"},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] A. Pandey and D.L. Wang, \u201cTCNN: Temporal convolutional neural network for real-time speech enhancement in the time domain,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.6875-6879, 2019. 10.1109\/icassp.2019.8683634","DOI":"10.1109\/ICASSP.2019.8683634"},{"key":"3","doi-asserted-by":"crossref","unstructured":"[3] S. Pascual, A. Bonafonte, and J. Serra, \u201cSEGAN: Speech enhancement generative adversarial network,\u201d 18th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.3642-3646, 2017. 10.21437\/interspeech.2017-1428","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] D. Rethage, J. Pons, and X. Serra, \u201cA wavenet for speech denoising,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.5069-5073, 2018. 10.1109\/icassp.2018.8462417","DOI":"10.1109\/ICASSP.2018.8462417"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] Y. Zhao, D.L. Wang, I. Merks, and T. Zhang, \u201cDNN-based enhancement of noisy and reverberant speech,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.6525-6529, 2016. 10.1109\/icassp.2016.7472934","DOI":"10.1109\/ICASSP.2016.7472934"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] K. Tan and D.L. Wang, \u201cA convolutional recurrent neural network for real-time speech enhancement,\u201d 19th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.3229-3233, 2018. 10.21437\/interspeech.2018-1405","DOI":"10.21437\/Interspeech.2018-1405"},{"key":"7","doi-asserted-by":"publisher","unstructured":"[7] C. Zheng, X.L. Peng, Y. Zhang, S. Srinivasan, and Y. Lu, \u201cInteractive speech and noise modeling for speech enhancement,\u201d 35th AAAI Conference on Artificial Intelligence, Electronic Network, vol.35, no.16, pp.14549-14557, 2021. 10.1609\/aaai.v35i16.17710","DOI":"10.1609\/aaai.v35i16.17710"},{"key":"8","doi-asserted-by":"publisher","unstructured":"[8] Y. Xu, J. Du, L.R. Dai, and C.H. Lee, \u201cA regression approach to speech enhancement based on deep neural networks,\u201d IEEE\/ACM Trans. Audio, Speech, Language Process., vol.23, no.1, pp.7-19, 2015. 10.1109\/taslp.2014.2364452","DOI":"10.1109\/TASLP.2014.2364452"},{"key":"9","doi-asserted-by":"publisher","unstructured":"[9] L. Zhang, G.Z. Bao, J. Zhang, and Z.F. Ye, \u201cSupervised single-channel speech enhancement using ratio mask with joint dictionary learning,\u201d Speech Communication, vol.82, pp.38-52, 2016. 10.1016\/j.specom.2016.06.001","DOI":"10.1016\/j.specom.2016.06.001"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] Y.H. Tu, J. Du, and C.H. Lee, \u201cDNN training based on classic gain function for single-channel speech enhancement and recognition,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.910-914, 2019. 10.1109\/icassp.2019.8682195","DOI":"10.1109\/ICASSP.2019.8682195"},{"key":"11","unstructured":"[11] H.S. Choi, J.H. Kim, J. Huh, A. Kim, J.W. Ha, and K. Lee, \u201cPhase-aware speech enhancement with deep complex U-NET,\u201d International Conference on Learning Representations (ICLR), pp.1-20, 2019."},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] K. Tan and D.L. Wang, \u201cComplex spectral mapping with a convolutional recurrent network for monaural speech enhancement,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.6865-6869, 2019. 10.1109\/icassp.2019.8682834","DOI":"10.1109\/ICASSP.2019.8682834"},{"key":"13","doi-asserted-by":"publisher","unstructured":"[13] K. Tan and D.L. Wang, \u201cLearning complex spectral mapping with gated convolutional recurrent networks for monaural speech enhancement,\u201d IEEE\/ACM Trans. Audio, Speech, Language Process., vol.28, pp.380-390, 2020. 10.1109\/taslp.2019.2955276","DOI":"10.1109\/TASLP.2019.2955276"},{"key":"14","doi-asserted-by":"publisher","unstructured":"[14] Y.X. Wang, A. Narayanan, and D.L. Wang, \u201cOn training targets for supervised speech separation,\u201d IEEE\/ACM Trans. Audio, Speech, Language Process., vol.22, no.12, pp.1849-1858, 2014. 10.1109\/taslp.2014.2352935","DOI":"10.1109\/TASLP.2014.2352935"},{"key":"15","unstructured":"[15] S.B. Lv, Y.X. Hu, S.M. Zhang, and L. Xie, \u201cDCCRN+: Channel-wise subband DCCRN with SNR estimation for speech enhancement,\u201d 22th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.2816-2820, 2021. 10.21437\/interspeech.2021-1482"},{"key":"16","doi-asserted-by":"publisher","unstructured":"[16] D.S. Williamson, Y.X. Wang, and D.L. Wang, \u201cComplex ratio masking for monaural speech separation,\u201d IEEE\/ACM Trans. Audio, Speech, Language Process., vol.24, no.3, pp.483-492, 2016. 10.1109\/taslp.2015.2512042","DOI":"10.1109\/TASLP.2015.2512042"},{"key":"17","unstructured":"[17] Y.X. Hu, Y. Liu, S.B. Lv, M.T. Xing, S.M. Zhang, Y.H. Fu, J. Wu, B.H. Zhang, and L. Xie, \u201cDCCRN: Deep complex convolution recurrent network for phase-aware speech enhancement,\u201d 21th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.2472-2476, 2020. 10.21437\/interspeech.2020-2537"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] Y.H. Sun, L.J. Yang, H.F. Zhu, and J. Han, \u201cFunnel deep complex u-net for phase-aware speech enhancement,\u201d 22th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.161-165, 2021. 10.21437\/interspeech.2021-10","DOI":"10.21437\/Interspeech.2021-10"},{"key":"19","doi-asserted-by":"publisher","unstructured":"[19] D.C. Yin, C. Luo, Z.W. Xiong, and W.J. Zeng, \u201cPHASEN: A phase-and-harmonics-aware speech enhancement network,\u201d 34th AAAI Conference on Artificial Intelligence (AAAI), pp.9459-9465, 2020. 10.1609\/aaai.v34i05.6489","DOI":"10.1609\/aaai.v34i05.6489"},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] Y. Luo, Z. Ghen, and T. Yoshioka, \u201cDual-path RNN: Efficient long sequence modeling for time-domain single-channel speech separation,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.46-50, 2020. 10.1109\/icassp40776.2020.9054266","DOI":"10.1109\/ICASSP40776.2020.9054266"},{"key":"21","unstructured":"[21] X.H. Le, H.S. Chen, K. Chen, and J. Lu, \u201cDPCRN: Dual-path convolution recurrent network for single channel speech enhancement,\u201d 22th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.2811-2815, 2021. 10.21437\/interspeech.2021-296"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] F. Dang, H. Chen, and P. Zhang, \u201cDPT-FSNet: Dual-path transformer based full-band and sub-band fusion network for speech enhancement,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.6857-6861, 2022. 10.1109\/icassp43922.2022.9746171","DOI":"10.1109\/ICASSP43922.2022.9746171"},{"key":"23","doi-asserted-by":"publisher","unstructured":"[23] J. Hu, L. Shen, S. Albanie, G. Sun, and E.H. Wu, \u201cSqueeze-and-excitation networks,\u201d IEEE Trans Pattern Anal. Mach. Intell., vol.42, no.8, pp.2011-2023, 2020. 10.1109\/tpami.2019.2913372","DOI":"10.1109\/TPAMI.2019.2913372"},{"key":"24","doi-asserted-by":"crossref","unstructured":"[24] Q.Q. Zhang, Q. Song, A. Nicolson, T. Lan, and H.Z. Li, \u201cTemporal convolutional network with frequency dimension adaptive attention for speech enhancement,\u201d 22th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.166-170, 2021. 10.21437\/interspeech.2021-46","DOI":"10.21437\/Interspeech.2021-46"},{"key":"25","doi-asserted-by":"crossref","unstructured":"[25] Q.Q. Zhang, Q. Song, Z.H. Ni, A. Nicolson, and H.Z. Li, \u201cTime-frequency attention for monaural speech enhancement,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.7852-7856, 2022. 10.1109\/icassp43922.2022.9746454","DOI":"10.1109\/ICASSP43922.2022.9746454"},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] S.K. Zhao, T.H. Nguyen, and B. Ma, \u201cMonaural speech enhancement with complex convolutional block attention module and joint time frequency losses,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.6648-6652, 2021. 10.1109\/icassp39728.2021.9414569","DOI":"10.1109\/ICASSP39728.2021.9414569"},{"key":"27","doi-asserted-by":"crossref","unstructured":"[27] C. Valentini-Botinhao, W. Xin, S. Takaki, and J. Yamagishi, \u201cInvestigating RNN-based speech enhancement methods for noise-robust Text-to-Speech,\u201d 9th ISCA Speech Synthesis Workshop, pp.160-165, 2016. 10.21437\/ssw.2016-24","DOI":"10.21437\/SSW.2016-24"},{"key":"28","doi-asserted-by":"crossref","unstructured":"[28] A.W. Rix, J.G. Beerends, M.P. Hollier, and A.P. Hekstra, \u201cPerceptual evaluation of speech quality (PESQ): A new method for speech quality assessment of telephone networks and codecs,\u201d IEEE International Conference On Acoustics, Speech, And Signal Processing (ICASSP), pp.749-752, 2001. 10.1109\/icassp.2001.941023","DOI":"10.1109\/ICASSP.2001.941023"},{"key":"29","doi-asserted-by":"publisher","unstructured":"[29] Y. Hu and P.C. Loizou, \u201cEvaluation of objective quality measures for speech enhancement,\u201d IEEE Trans. Audio, Speech, Language Process., vol.16, no.1, pp.229-238, 2008. 10.1109\/tasl.2007.911054","DOI":"10.1109\/TASL.2007.911054"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] A. D\u00e9fossez, G. Synnaeve, and Y. Adi, \u201cReal time speech enhancement in the waveform domain,\u201d 21th Annual Conference of The International Speech Communication Association (INTERSPEECH), pp.3291-3295, 2020. 10.21437\/interspeech.2020-2409","DOI":"10.21437\/Interspeech.2020-2409"},{"key":"31","doi-asserted-by":"publisher","unstructured":"[31] X.X. Xiang, X.J. Zhang, and H.Z. Chen, \u201cA nested u-net with self-attention and dense connectivity for monaural speech enhancement,\u201d IEEE Signal Process. Lett., vol.29, pp.105-109, 2022. 10.1109\/lsp.2021.3128374","DOI":"10.1109\/LSP.2021.3128374"},{"key":"32","doi-asserted-by":"crossref","unstructured":"[32] Y. Zhang, H. Zou, and J. Zhu, \u201cMagnitude-and-phase-aware speech enhancement with parallel sequence modeling,\u201d IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp.1-8, 2023. 10.1109\/asru57964.2023.10389726","DOI":"10.1109\/ASRU57964.2023.10389726"}],"container-title":["IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E108.A\/8\/E108.A_2024EAP1173\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T03:28:20Z","timestamp":1754105300000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E108.A\/8\/E108.A_2024EAP1173\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,1]]},"references-count":32,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.1587\/transfun.2024eap1173","relation":{},"ISSN":["0916-8508","1745-1337"],"issn-type":[{"type":"print","value":"0916-8508"},{"type":"electronic","value":"1745-1337"}],"subject":[],"published":{"date-parts":[[2025,8,1]]},"article-number":"2024EAP1173"}}