{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T11:46:08Z","timestamp":1762429568413},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s10772-023-10044-x","type":"journal-article","created":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T15:01:41Z","timestamp":1696604501000},"page":"735-742","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Speech signal analysis and enhancement using combined wavelet Fourier transform with stacked deep learning architecture"],"prefix":"10.1007","volume":"26","author":[{"given":"V.","family":"Srinivasarao","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,6]]},"reference":[{"key":"10044_CR1","doi-asserted-by":"crossref","unstructured":"Abdulbaqi, J., Gu, Y., & Marsic, I. (2019). RHR-Net: A residual hourglass recurrent neural network for speech enhancement. arXiv preprint arXiv:1904.07294","DOI":"10.1109\/ICASSP40776.2020.9053544"},{"issue":"02","key":"10044_CR2","doi-asserted-by":"publisher","first-page":"52","DOI":"10.38094\/jastt20291","volume":"2","author":"SMSA Abdullah","year":"2021","unstructured":"Abdullah, S. M. S. A., Ameen, S. Y. A., Sadeeq, M. A., & Zeebaree, S. (2021). Multimodal emotion recognition using deep learning. Journal of Applied Science and Technology Trends, 2(02), 52\u201358.","journal-title":"Journal of Applied Science and Technology Trends"},{"key":"10044_CR3","doi-asserted-by":"publisher","first-page":"130657","DOI":"10.1109\/ACCESS.2022.3228744","volume":"10","author":"S Abdullah","year":"2022","unstructured":"Abdullah, S., Zamani, M., & Demosthenous, A. (2022). A compact CNN-based speech enhancement with adaptive filter design using gabor function and region-aware convolution. IEEE Access, 10, 130657.","journal-title":"IEEE Access"},{"key":"10044_CR4","doi-asserted-by":"publisher","first-page":"2993","DOI":"10.1109\/TASLP.2022.3207349","volume":"30","author":"X Bie","year":"2022","unstructured":"Bie, X., Leglaive, S., Alameda-Pineda, X., & Girin, L. (2022). Unsupervised speech enhancement using dynamical variationalautoencoders. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 30, 2993\u20133007.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10044_CR5","doi-asserted-by":"crossref","unstructured":"Braithwaite, D. T., & Kleijn, W. B. (2019). Speech enhancement with variance constrained autoencoders. In Interspeech (pp. 1831\u20131835).","DOI":"10.21437\/Interspeech.2019-1809"},{"issue":"2","key":"10044_CR6","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1016\/j.bbe.2022.03.002","volume":"42","author":"M Chaiani","year":"2022","unstructured":"Chaiani, M., Selouani, S. A., Boudraa, M., & Yakoub, M. S. (2022). Voice disorder classification using speech enhancement and deep learning models. Biocybernetics and Biomedical Engineering, 42(2), 463\u2013480.","journal-title":"Biocybernetics and Biomedical Engineering"},{"issue":"6","key":"10044_CR7","doi-asserted-by":"publisher","first-page":"102","DOI":"10.3390\/computation10060102","volume":"10","author":"M Guti\u00e9rrez-Mu\u00f1oz","year":"2022","unstructured":"Guti\u00e9rrez-Mu\u00f1oz, M., & Coto-Jim\u00e9nez, M. (2022). An experimental study on speech enhancement based on a combination of wavelets and deep learning. Computation, 10(6), 102.","journal-title":"Computation"},{"key":"10044_CR8","doi-asserted-by":"crossref","unstructured":"Han, C., Luo, Y., & Mesgarani, N. (2020). Real-time binaural speech separation with preserved spatial cues. In Proceedings of IEEE international conference on acoustics, speech, signal processing (ICASSP) (pp. 6404\u20136408). IEEE.","DOI":"10.1109\/ICASSP40776.2020.9053215"},{"key":"10044_CR9","doi-asserted-by":"crossref","unstructured":"Huang, Z., Watanabe, S., Yang, S. W., Garc\u00eda, P., & Khudanpur, S. (2022). Investigating self-supervised learning for speech enhancement and separation. In ICASSP 2022\u20132022 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 6837\u20136841). IEEE.","DOI":"10.1109\/ICASSP43922.2022.9746303"},{"key":"10044_CR10","doi-asserted-by":"publisher","first-page":"137584","DOI":"10.1109\/ACCESS.2021.3118211","volume":"9","author":"JW Hwang","year":"2021","unstructured":"Hwang, J. W., Park, R. H., & Park, H. M. (2021). Efficient audio-visual speech enhancement using deep U-Net with early fusion of audio and video information and RNN attention blocks. IEEE Access, 9, 137584\u2013137598.","journal-title":"IEEE Access"},{"key":"10044_CR11","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/j.isprsjprs.2018.11.014","volume":"147","author":"S Jabari","year":"2019","unstructured":"Jabari, S., Rezaee, M., Fathollahi, F., & Zhang, Y. (2019). Multispectral change detection using multivariate Kullback-Leibler distance. ISPRS Journal of Photogrammetry and Remote Sensing, 147, 163\u2013177.","journal-title":"ISPRS Journal of Photogrammetry and Remote Sensing"},{"issue":"3","key":"10044_CR12","doi-asserted-by":"publisher","first-page":"1959","DOI":"10.1007\/s11277-021-08313-6","volume":"119","author":"A Karthik","year":"2021","unstructured":"Karthik, A., & MazherIqbal, J. L. (2021). Efficient speech enhancement using recurrent convolution encoder and decoder. Wireless Personal Communications, 119(3), 1959\u20131973.","journal-title":"Wireless Personal Communications"},{"key":"10044_CR13","doi-asserted-by":"publisher","first-page":"1898","DOI":"10.1109\/LSP.2022.3200581","volume":"29","author":"H Kim","year":"2022","unstructured":"Kim, H., Kang, K., & Shin, J. W. (2022). Factorized MVDR deep beamforming for multi-channel speech enhancement. IEEE Signal Processing Letters, 29, 1898\u20131902.","journal-title":"IEEE Signal Processing Letters"},{"key":"10044_CR14","doi-asserted-by":"publisher","first-page":"103109","DOI":"10.1016\/j.dsp.2021.103109","volume":"116","author":"H Kim","year":"2021","unstructured":"Kim, H., & Shin, J. W. (2021). Target exaggeration for deep learning-based speech enhancement. Digital Signal Processing, 116, 103109.","journal-title":"Digital Signal Processing"},{"key":"10044_CR15","unstructured":"Kominek, J., & Black, A. W. (2004). The CMU Arctic speech databases. In Proceedings of the fifth ISCA workshop on speech synthesis, Vienna, Austria, 20\u201322 September 2004."},{"key":"10044_CR16","doi-asserted-by":"publisher","first-page":"2411","DOI":"10.1109\/TASLP.2022.3190738","volume":"30","author":"X Le","year":"2022","unstructured":"Le, X., Lei, T., Chen, K., & Lu, J. (2022). Inference skipping for more efficient real-time speech enhancement with parallel RNNs. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 30, 2411\u20132421.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10044_CR17","doi-asserted-by":"publisher","first-page":"108499","DOI":"10.1016\/j.apacoust.2021.108499","volume":"187","author":"A Li","year":"2022","unstructured":"Li, A., Zheng, C., Zhang, L., & Li, X. (2022). Glance and gaze: A collaborative learning framework for single-channel speech enhancement. Applied Acoustics, 187, 108499.","journal-title":"Applied Acoustics"},{"issue":"18","key":"10044_CR18","doi-asserted-by":"publisher","first-page":"6825","DOI":"10.3390\/s22186825","volume":"22","author":"XX Li","year":"2022","unstructured":"Li, X. X., Li, D., Ren, W. X., & Zhang, J. S. (2022). Loosening identification of multi-bolt connections based on wavelet transform and ResNet-50 convolutional neural network. Sensors, 22(18), 6825.","journal-title":"Sensors"},{"key":"10044_CR19","doi-asserted-by":"publisher","first-page":"3440","DOI":"10.1109\/TASLP.2021.3125143","volume":"29","author":"J Lin","year":"2021","unstructured":"Lin, J., van Wijngaarden, A. J. D. L., Wang, K. C., & Smith, M. C. (2021b). Speech enhancement using multi-stage self-attentive temporal convolutional networks. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 29, 3440\u20133450.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10044_CR20","doi-asserted-by":"publisher","first-page":"1016","DOI":"10.1109\/TASLP.2021.3133209","volume":"30","author":"YC Lin","year":"2021","unstructured":"Lin, Y. C., Yu, C., Hsu, Y. T., Fu, S. W., Tsao, Y., & Kuo, T. W. (2021a). SEOFP-NET: Compression and acceleration of deep neural networks for speech enhancement using sign-exponent-only floating-points. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 30, 1016\u20131031.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"10044_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-020-00191-3","volume":"2021","author":"J Llombart","year":"2021","unstructured":"Llombart, J., Ribas, D., Miguel, A., Vicente, L., Ortega, A., & Lleida, E. (2021). Progressive loss functions for speech enhancement with deep neural networks. EURASIP Journal on Audio, Speech, and Music Processing, 2021(1), 1\u201316.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"10044_CR22","doi-asserted-by":"publisher","first-page":"1368","DOI":"10.1109\/TASLP.2021.3066303","volume":"29","author":"D Michelsanti","year":"2021","unstructured":"Michelsanti, D., Tan, Z. H., Zhang, S. X., Xu, Y., Yu, M., Yu, D., & Jensen, J. (2021). An overview of deep-learning-based audio-visual speech enhancement and separation. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 29, 1368\u20131396.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10044_CR23","unstructured":"Passos, L. A., Khubaib, A., Raza, M., & Adeel, A. (2022). Multimodal speech enhancement using burst propagation. arXiv preprint arXiv:2209.03275."},{"key":"10044_CR24","doi-asserted-by":"crossref","unstructured":"Polyak, A., Wolf, L., Adi, Y., Kabeli, O., & Taigman, Y. (2021). High fidelity speech regeneration with application to speech enhancement. In Proceedings of IEEE international conference on acoustics, speech, and signal processing (ICASSP) (pp. 7143\u20137147). IEEE.","DOI":"10.1109\/ICASSP39728.2021.9414853"},{"key":"10044_CR25","doi-asserted-by":"crossref","unstructured":"Rao, W., Fu, Y., Hu, Y., Xu, X., Jv, Y., Han, J., Jiang, Xie, L., Wang, Y., Watanabe, S., et al. (2021). Interspeech 2021 conferencing speech challenge: Towards far-field multi-channel speech enhancement for video conferencing. arXiv:2104.00960.","DOI":"10.1109\/ASRU51503.2021.9688126"},{"key":"10044_CR26","doi-asserted-by":"crossref","unstructured":"Reddy, C. K., Dubey, H., Koishida, K., Nair, A., Gopal, V., Cutler, R., Braun, S., Gamper, H., Aichner, R., & Srinivasan, S. (2021). Interspeech 2021 deep noise suppression challenge. In Interspeech.","DOI":"10.21437\/Interspeech.2021-1609"},{"issue":"18","key":"10044_CR27","doi-asserted-by":"publisher","first-page":"9000","DOI":"10.3390\/app12189000","volume":"12","author":"D Ribas","year":"2022","unstructured":"Ribas, D., Miguel, A., Ortega, A., & Lleida, E. (2022). Wiener filter and deep neural networks: A well-balanced pair for speech enhancement. Applied Sciences, 12(18), 9000.","journal-title":"Applied Sciences"},{"key":"10044_CR28","doi-asserted-by":"crossref","unstructured":"Sun, K., & Zhang, X. (2021). UltraSE: Single-channel speech enhancement using ultrasound. In Proceedings of the 27th annual international conference on mobile computing and networking\u00a0(pp. 160\u2013173).","DOI":"10.1145\/3447993.3448626"},{"key":"10044_CR29","doi-asserted-by":"crossref","unstructured":"Toloosham, B., & Koishida, K. (2022). A training framework for stereo-aware speech enhancement using deep neural networks. In\u00a0ICASSP 2022\u20132022 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 6962\u20136966). IEEE.","DOI":"10.1109\/ICASSP43922.2022.9746070"},{"key":"10044_CR30","doi-asserted-by":"crossref","unstructured":"Tolooshams, B., Giri, R., Song, A. H., Isik, U., & Krishnaswamy, A. (2020). Channel-attention dense u-net for multichannel speech enhancement. In Proceedings of IEEE international conference on acoustics, speech, and signal processing (ICASSP) (pp. 836\u2013840). IEEE.","DOI":"10.1109\/ICASSP40776.2020.9053989"},{"key":"10044_CR31","doi-asserted-by":"publisher","first-page":"1443","DOI":"10.1177\/1475921720918378","volume":"20","author":"R Wang","year":"2021","unstructured":"Wang, R., Chencho, An, S., Li, J., Li, L., Hao, H., & Liu, W. (2021). Deep residual network framework for structural health monitoring. Structural Health Monitoring, 20, 1443\u20131461.","journal-title":"Structural Health Monitoring"},{"issue":"1","key":"10044_CR32","doi-asserted-by":"publisher","first-page":"19","DOI":"10.14203\/jet.v21.19-26","volume":"21","author":"AR Yuliani","year":"2021","unstructured":"Yuliani, A. R., Amri, M. F., Suryawati, E., Ramdan, A., & Pardede, H. F. (2021). Speech enhancement using deep learning methods: A review. Jurnal Elektronikadan Telekomunikasi, 21(1), 19\u201326.","journal-title":"Jurnal Elektronikadan Telekomunikasi"},{"key":"10044_CR33","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1109\/TASLP.2022.3225649","volume":"31","author":"Q Zhang","year":"2022","unstructured":"Zhang, Q., Qian, X., Ni, Z., Nicolson, A., Ambikairajah, E., & Li, H. (2022). A time-frequency attention module for neural speech enhancement. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 31, 462.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"5","key":"10044_CR34","doi-asserted-by":"publisher","first-page":"3291","DOI":"10.1121\/10.0011396","volume":"151","author":"C Zheng","year":"2022","unstructured":"Zheng, C., Liu, W., Li, A., Ke, Y., & Li, X. (2022). Low-latency monaural speech enhancement with deep filter-bank equalizer. The Journal of the Acoustical Society of America, 151(5), 3291\u20133304.","journal-title":"The Journal of the Acoustical Society of America"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10044-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-023-10044-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10044-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T14:11:38Z","timestamp":1699625498000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-023-10044-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9]]},"references-count":34,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["10044"],"URL":"https:\/\/doi.org\/10.1007\/s10772-023-10044-x","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9]]},"assertion":[{"value":"13 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}