{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T11:40:10Z","timestamp":1690803610114},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,5,25]],"date-time":"2022-05-25T00:00:00Z","timestamp":1653436800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,5,25]],"date-time":"2022-05-25T00:00:00Z","timestamp":1653436800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s10772-022-09972-x","type":"journal-article","created":{"date-parts":[[2022,5,25]],"date-time":"2022-05-25T11:02:57Z","timestamp":1653476577000},"page":"355-370","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SHO based Deep Residual network and hierarchical speech features for speech enhancement"],"prefix":"10.1007","volume":"26","author":[{"given":"Manju Ramrao","family":"Bhosle","sequence":"first","affiliation":[]},{"given":"Nagesh Kallollu","family":"Narayaswamy","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,25]]},"reference":[{"key":"9972_CR1","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/j.inffus.2019.08.008","volume":"59","author":"A Adeel","year":"2020","unstructured":"Adeel, A., Gogate, M., & Hussain, A. (2020). Contextual deep learning-based audio\u2013visual switching for speech enhancement in real-world environments. Information Fusion, 59, 163\u2013170.","journal-title":"Information Fusion"},{"issue":"1","key":"9972_CR2","first-page":"9","volume":"2","author":"JS Anita","year":"2019","unstructured":"Anita, J. S., & Abinaya, J. S. (2019). Impact of supervised classifier on speech emotion recognition. Multimedia Research, 2(1), 9\u201316.","journal-title":"Multimedia Research"},{"key":"9972_CR3","doi-asserted-by":"crossref","unstructured":"Asl, L. B., & Nezhad, V. M. (2010). Speech enhancement using particle swarm optimization techniques. In Proceedings of international conference on measuring technology and mechatronics automation (pp. 441\u2013444).","DOI":"10.1109\/ICMTMA.2010.510"},{"key":"9972_CR4","doi-asserted-by":"crossref","unstructured":"Bando, Y., Mimura, M., Itoyama, K., Yoshii, K., & Kawahara, T. (2018). Statistical speech enhancement based on probabilistic integration of variational autoencoder and non-negative matrix factorization. In Proceedings of IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 716\u2013720).","DOI":"10.1109\/ICASSP.2018.8461530"},{"issue":"10","key":"9972_CR5","doi-asserted-by":"publisher","first-page":"491","DOI":"10.3390\/medicina56100491","volume":"56","author":"M Caliendo","year":"2020","unstructured":"Caliendo, M., Lanzara, V., Vetri, L., Roccella, M., Marotta, R., Carotenuto, M., Russo, D., Cerroni, F., & Precenzano, F. (2020). Emotional\u2013behavioral disorders in healthy siblings of children with neurodevelopmental disorders. Medicina, 56(10), 491.","journal-title":"Medicina"},{"issue":"5","key":"9972_CR7","doi-asserted-by":"publisher","first-page":"2604","DOI":"10.1121\/1.4948445","volume":"139","author":"J Chen","year":"2016","unstructured":"Chen, J., Wang, Y., Yoho, S. E., Wang, D., & Healy, E. W. (2016). Large-scale training to increase speech intelligibility for hearing-impaired listeners in novel noises. The Journal of the Acoustical Society of America, 139(5), 2604\u20132612.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"4","key":"9972_CR6","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen, L. C., Papandreou, G., Kokkinos, I., Murphy, K., & Yuille, A. L. (2017). Semantic image segmentation with deep convolutional nets and fully connected. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(4), 834\u2013848.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"4","key":"9972_CR12","doi-asserted-by":"publisher","first-page":"1556","DOI":"10.3390\/ijerph18041556","volume":"18","author":"G D\u2019Addazio","year":"2021","unstructured":"D\u2019Addazio, G., Santilli, M., Sinjari, B., Xhajanka, E., Rexhepi, I., Mangifesta, R., & Caputi, S. (2021). Access to dental care\u2014A survey from dentists, people with disabilities and caregivers. International Journal of Environmental Research and Public Health, 18(4), 1556.","journal-title":"International Journal of Environmental Research and Public Health"},{"issue":"4","key":"9972_CR8","first-page":"12","volume":"2","author":"RV Darekar","year":"2019","unstructured":"Darekar, R. V., & Dhande, A. P. (2019). Emotion recognition from speech signals using DCNN with hybrid GA-GWO algorithm. Multimedia Research, 2(4), 12\u201322.","journal-title":"Multimedia Research"},{"key":"9972_CR9","unstructured":"Dauphin, Y. N., Fan, A., Auli, M., & Grangier, D. (2017). Language modeling with gated convolutional networks. In Proceeding of international conference on machine learning (pp. 933\u2013941)."},{"issue":"11","key":"9972_CR10","doi-asserted-by":"publisher","first-page":"3734","DOI":"10.35940\/ijitee.K1999.0981119","volume":"8","author":"SB Dhonde","year":"2019","unstructured":"Dhonde, S. B., Chaudhari, A. A., & Gajare, M. P. (2019). Performance evaluation of Mel and bark scale based features for text-independent speaker identification. International Journal of Innovative Technology and Exploring Engineering, 8(11), 3734\u20133738.","journal-title":"International Journal of Innovative Technology and Exploring Engineering"},{"key":"9972_CR11","doi-asserted-by":"crossref","unstructured":"Donahue, C., Li, B., & Prabhavalkar, R. (2018). Exploring speech enhancement with generative adversarial networks for robust speech recognition. In Proceedings of IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 5024\u20135028).","DOI":"10.1109\/ICASSP.2018.8462581"},{"key":"9972_CR13","doi-asserted-by":"crossref","unstructured":"El-Solh, A., Cuhadar, A., & Goubran, R. A. (2007). Evaluation of speech enhancement techniques for speaker identification in noisy environments. In Proceedings of ninth IEEE international symposium on multimedia workshops (ISMW 2007) (pp. 235\u2013239).","DOI":"10.1109\/ISM.Workshops.2007.47"},{"issue":"6","key":"9972_CR14","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Ephraim, Y., & Malah, D. (1984). Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator. IEEE Transactions on Acoustics, Speech, and Signal Processing, 32(6), 1109\u20131121.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9972_CR15","doi-asserted-by":"crossref","unstructured":"Erdogan, H., Hershey, J. R., Watanabe, S., & Le Roux, J. (2015). Phase-sensitive and recognition-boosted speech separation using deep recurrent neural networks. In Proceedings of IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 708\u2013712).","DOI":"10.1109\/ICASSP.2015.7178061"},{"key":"9972_CR17","unstructured":"Fu, S. W., Liao, C. F., Tsao, Y., & Lin, S. D. (2019). MetricGAN: Generative adversarial networks based black-box metric scores optimization for speech enhancement. In Proceeding of 36th international conference on machine learning, ICML, 97 (pp. 2031\u20132041)."},{"key":"9972_CR18","doi-asserted-by":"crossref","unstructured":"Fu, S. W., Tsao, Y., & Lu, X. (2016). SNR-aware convolutional neural network modeling for speech enhancement. In INTERSPEECH (pp. 3768\u20133772).","DOI":"10.21437\/Interspeech.2016-211"},{"issue":"9","key":"9972_CR16","doi-asserted-by":"publisher","first-page":"1570","DOI":"10.1109\/TASLP.2018.2821903","volume":"26","author":"SW Fu","year":"2018","unstructured":"Fu, S. W., Wang, T. W., Tsao, Y., Lu, X., & Kawai, H. (2018). End-to-end waveform utterance enhancement for direct evaluation metrics optimization by fully convolutional neural networks. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 26(9), 1570\u20131584.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"2","key":"9972_CR19","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1177\/003754970107600201","volume":"76","author":"ZW Geem","year":"2001","unstructured":"Geem, Z. W., Kim, J. H., & Loganathan, G. V. (2001). A new heuristic optimization algorithm: Harmony search. SIMULATION, 76(2), 60\u201368.","journal-title":"SIMULATION"},{"issue":"4","key":"9972_CR20","doi-asserted-by":"publisher","first-page":"1383","DOI":"10.1109\/TASL.2011.2180896","volume":"20","author":"T Gerkmann","year":"2011","unstructured":"Gerkmann, T., & Hendriks, R. C. (2011). Unbiased MMSE-based noise power estimation with low complexity and low tracking delay. IEEE Transactions on Audio, Speech, and Language Processing, 20(4), 1383\u20131393.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"17","key":"9972_CR21","doi-asserted-by":"publisher","first-page":"6056","DOI":"10.3390\/ijms21176056","volume":"21","author":"B Ghinassi","year":"2020","unstructured":"Ghinassi, B., Baldassarre, A., D\u2019Addazio, G. D., Traini, T., Andrisani, M., Vincenzo, G. D., Gaggi, G., Piattelli, M., Caputi, S., & Sinjari, B. (2020). Gingival response to dental implant: Comparison study on the effects of new nanopored laser-treated vs traditional healing abutments. International Journal of Molecular Sciences, 21(17), 6056.","journal-title":"International Journal of Molecular Sciences"},{"issue":"7\u20138","key":"9972_CR22","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1080\/01969722.2018.1448241","volume":"49","author":"AV Haridas","year":"2018","unstructured":"Haridas, A. V., Marimuthu, R., & Chakraborty, B. (2018). A novel approach to improve the speech intelligibility using fractional delta-amplitude modulation spectrogram. Cybernetics and Systems, 49(7\u20138), 421\u2013451.","journal-title":"Cybernetics and Systems"},{"issue":"2","key":"9972_CR23","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/TETCI.2017.2784878","volume":"2","author":"JC Hou","year":"2018","unstructured":"Hou, J. C., Wang, S. S., Lai, Y. H., Tsao, Y., Chang, H. W., & Wang, H. M. (2018). Audio\u2013visual speech enhancement using multimodal deep convolutional neural networks. IEEE Transactions on Emerging Topics in Computational Intelligence, 2(2), 117\u2013128.","journal-title":"IEEE Transactions on Emerging Topics in Computational Intelligence"},{"key":"9972_CR24","doi-asserted-by":"crossref","unstructured":"Hu, Y., Liu, Y., Lv, S., Xing, M., Zhang, S., Fu, Y., Wu, J., Zhang, B., & Xie, L. (2020). DCCRN: Deep complex convolution recurrent network for phase-aware speech enhancement. Electrical engineering and systems science.","DOI":"10.21437\/Interspeech.2020-2537"},{"issue":"78","key":"9972_CR25","doi-asserted-by":"publisher","first-page":"2357","DOI":"10.1108\/EC-10-2019-0481","volume":"37","author":"A Kaveh","year":"2020","unstructured":"Kaveh, A., & Zaerreza, A. (2020). Shuffled shepherd optimization method: A new meta-heuristic algorithm. Engineering Computations, 37(78), 2357\u20132389.","journal-title":"Engineering Computations"},{"issue":"7","key":"9972_CR26","doi-asserted-by":"publisher","first-page":"1568","DOI":"10.1109\/TBME.2016.2613960","volume":"64","author":"YH Lai","year":"2016","unstructured":"Lai, Y. H., Chen, F., Wang, S. S., Lu, X., Tsao, Y., & Lee, C. H. (2016). A deep denoising autoencoder approach to improving the intelligibility of vocoded speech in cochlear implant simulation. IEEE Transactions on Biomedical Engineering, 64(7), 1568\u20131578.","journal-title":"IEEE Transactions on Biomedical Engineering"},{"issue":"4","key":"9972_CR27","doi-asserted-by":"publisher","first-page":"795","DOI":"10.1097\/AUD.0000000000000537","volume":"39","author":"YH Lai","year":"2018","unstructured":"Lai, Y. H., Tsao, Y., Lu, X., Chen, F., Su, Y. T., Chen, K. C., Chen, Y. H., Chen, L. C., Li, L. P. H., & Lee, C. H. (2018). Deep learning-based noise reduction approach to improve speech intelligibility for cochlear implant recipients. Ear and Hearing, 39(4), 795\u2013809.","journal-title":"Ear and Hearing"},{"key":"9972_CR28","doi-asserted-by":"crossref","unstructured":"Li, B., Tsao, Y., & Sim, K. C. (2013). An investigation of spectral restoration algorithms for deep neural networks-based noise robust speech recognition. In INTERSPEECH (pp. 3002\u20133006).","DOI":"10.21437\/Interspeech.2013-278"},{"key":"9972_CR29","doi-asserted-by":"publisher","first-page":"48464","DOI":"10.1109\/ACCESS.2020.2979554","volume":"8","author":"R Liang","year":"2020","unstructured":"Liang, R., Kong, F., Xie, Y., Tang, G., & Cheng, J. (2020). Real-time speech enhancement algorithm based on attention LSTM. IEEE Access, 8, 48464\u201348476.","journal-title":"IEEE Access"},{"key":"9972_CR31","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1016\/j.specom.2019.06.002","volume":"111","author":"A Nicolson","year":"2019","unstructured":"Nicolson, A., & Paliwal, K. K. (2019). Deep learning for minimum mean-square error approaches to speech enhancement. Speech Communication, 111, 44\u201355.","journal-title":"Speech Communication"},{"key":"9972_CR30","unstructured":"NOIZEUS database. Retrieved January 2021, from https:\/\/ecs.utdallas.edu\/loizou\/speech\/noizeus\/."},{"issue":"8","key":"9972_CR32","doi-asserted-by":"publisher","first-page":"473","DOI":"10.3390\/brainsci10080473","volume":"10","author":"FF Operto","year":"2020","unstructured":"Operto, F. F., Pastorino, G. M. G., Stellato, M., Morcaldi, L., Vetri, L., Carotenuto, M., Viggiano, A., & Coppola, G. (2020). Facial emotion recognition in children and adolescents with specific learning disorder. Brain Sciences, 10(8), 473.","journal-title":"Brain Sciences"},{"issue":"7","key":"9972_CR33","doi-asserted-by":"publisher","first-page":"1179","DOI":"10.1109\/TASLP.2019.2913512","volume":"27","author":"A Pandey","year":"2019","unstructured":"Pandey, A., & Wang, D. (2019a). A new framework for CNN-based speech enhancement in the time domain. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 27(7), 1179\u20131188.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9972_CR34","doi-asserted-by":"crossref","unstructured":"Pandey, A., & Wang, D. (2019b). TCNN: Temporal convolutional neural network for real-time speech enhancement in the time domain. In Proceedings of IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 6875\u20136879).","DOI":"10.1109\/ICASSP.2019.8683634"},{"key":"9972_CR35","doi-asserted-by":"publisher","first-page":"1700","DOI":"10.1109\/LSP.2020.3025020","volume":"27","author":"H Phan","year":"2020","unstructured":"Phan, H., McLoughlin, I. V., Pham, L., Ch\u00e9n, O. Y., Koch, P., Vos, M. D., & Mertins, A. (2020). Improving GANs for speech enhancement. IEEE Signal Processing Letters, 27, 1700\u20131704.","journal-title":"IEEE Signal Processing Letters"},{"issue":"10","key":"9972_CR36","first-page":"120","volume":"8","author":"GM Rao","year":"2017","unstructured":"Rao, G. M., & Dinesh Gupta, K. N. P. V. R. (2017). Speech signal enhancement using firefly optimization algorithm. International Journal of Mechanical Engineering and Technology, 8(10), 120\u2013129.","journal-title":"International Journal of Mechanical Engineering and Technology"},{"issue":"1","key":"9972_CR37","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1109\/TASLP.2018.2876171","volume":"27","author":"K Tan","year":"2018","unstructured":"Tan, K., Chen, J., & Wang, D. (2018). Gated residual networks with dilated convolutions for monaural speech enhancement. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 27(1), 189\u2013198.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9972_CR38","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.specom.2015.10.003","volume":"76","author":"Y Tsao","year":"2016","unstructured":"Tsao, Y., & Lai, Y. H. (2016). Generalized maximum a posteriori spectral amplitude estimation for speech enhancement. Speech Communication, 76, 112\u2013126.","journal-title":"Speech Communication"},{"issue":"1","key":"9972_CR39","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/TASLP.2018.2870725","volume":"27","author":"Y Zhao","year":"2018","unstructured":"Zhao, Y., Wang, Z. Q., & Wang, D. (2018). Two-stage deep learning for noisy-reverberant speech enhancement. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 27(1), 53\u201362.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9972_CR40","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xu, B., Giri, R., & Zhang, T. (2018b). Perceptually guided speech enhancement using deep neural networks. In Proceedings of IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 5074\u20135078).","DOI":"10.1109\/ICASSP.2018.8462593"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09972-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-022-09972-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09972-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T11:13:48Z","timestamp":1690802028000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-022-09972-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,25]]},"references-count":40,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["9972"],"URL":"https:\/\/doi.org\/10.1007\/s10772-022-09972-x","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,5,25]]},"assertion":[{"value":"21 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 April 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 May 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}