{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T18:00:37Z","timestamp":1780509637429,"version":"3.54.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,1,28]],"date-time":"2021-01-28T00:00:00Z","timestamp":1611792000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,28]],"date-time":"2021-01-28T00:00:00Z","timestamp":1611792000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s10772-021-09809-z","type":"journal-article","created":{"date-parts":[[2021,2,2]],"date-time":"2021-02-02T17:01:49Z","timestamp":1612285309000},"page":"425-437","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Audio signal quality enhancement using multi-layered convolutional neural network based auto encoder\u2013decoder"],"prefix":"10.1007","volume":"24","author":[{"given":"Shivangi","family":"Raj","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2471-6375","authenticated-orcid":false,"given":"P.","family":"Prakasam","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shubham","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,1,28]]},"reference":[{"key":"9809_CR1","doi-asserted-by":"publisher","unstructured":"Albawi, S., Mohammed, T. A., & Al-Zawi, S. (2017). Understanding of a convolutional neural network. In Proceedings of the IEEE international conference on engineering and technology, https:\/\/doi.org\/10.1109\/ICEngTechnol.2017.8308186.","DOI":"10.1109\/ICEngTechnol.2017.8308186"},{"issue":"2","key":"9809_CR2","first-page":"629","volume":"22","author":"A Ali","year":"2019","unstructured":"Ali, A. (2019). Impulse noise reduction in audio signal through multi-stage technique. Eng. Sci. Technol. Int. J., 22(2), 629\u2013636.","journal-title":"Eng. Sci. Technol. Int. J."},{"key":"9809_CR3","doi-asserted-by":"crossref","unstructured":"Ali, M. A., & Shemi, P. M. (2015). An improved method of audio denoising based on wavelet transform. In Proceedings of the IEEE international conference on power, instrumentation, control and computing, 1\u20136.","DOI":"10.1109\/PICC.2015.7455802"},{"issue":"3","key":"9809_CR4","doi-asserted-by":"publisher","first-page":"11:01","DOI":"10.1145\/1970392.1970395","volume":"58","author":"EJ Candes","year":"2011","unstructured":"Candes, E. J., Li, X., Ma, Y., & Wright, J. (2011). Robust principal component analysis? Journal of the ACM, 58(3), 11:01-11:37.","journal-title":"Journal of the ACM"},{"key":"9809_CR5","doi-asserted-by":"crossref","unstructured":"Chandra, B., & Sharma, R. K. (2014). Adaptive noise schedule for denoising autoencoder. In Neural information processing. ICONIP 2014. Lecture Notes in Computer Science, 8834, 535\u2013542.","DOI":"10.1007\/978-3-319-12637-1_67"},{"key":"9809_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Z., Watanabe, S., Erdogan, H., & Hershey, J. R. (2015). Speech enhancement and recognition using multi-task learning of long short term memory recurrent neural networks. In Proceedings of the 16th Annual Conference of the International Speech Communication Association, 3274\u20133278.","DOI":"10.21437\/Interspeech.2015-659"},{"key":"9809_CR7","doi-asserted-by":"publisher","first-page":"1979","DOI":"10.1109\/TIFS.2017.2678458","volume":"12","author":"YHW Chin","year":"2017","unstructured":"Chin, Y. H., Wang, J. C., Huang, C. L., Wang, K. Y., & Wu, C. H. (2017). Speaker identification using discriminative features and sparse representation. IEEE Transactions on Information Forensics and Security, 12, 1979\u20131987.","journal-title":"IEEE Transactions on Information Forensics and Security"},{"issue":"4","key":"9809_CR8","doi-asserted-by":"publisher","first-page":"2159","DOI":"10.3233\/JIFS-162329","volume":"33","author":"MJ Davoudabadi","year":"2017","unstructured":"Davoudabadi, M. J., & Aminghafari, M. (2017). A fuzzy-wavelet denoising technique with applications to noise reduction in audio signals. Journal of Intelligent & Fuzzy Systems, 33(4), 2159\u20132169.","journal-title":"Journal of Intelligent & Fuzzy Systems"},{"issue":"9","key":"9809_CR9","doi-asserted-by":"publisher","first-page":"1570","DOI":"10.1109\/TASLP.2018.2821903","volume":"26","author":"SW Fu","year":"2018","unstructured":"Fu, S. W., Wang, T. W., Tsao, Y., Lu, X., & Kawai, H. (2018). End-to-end waveform utterance enhancement for direct evaluation metrics optimization by fully convolutional neural networks. IEEE\/ACM Transactions on Audio, Speech and Language Processing, 26(9), 1570\u20131584.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing"},{"key":"9809_CR10","volume-title":"2019","author":"M Michelashvili","year":"2019","unstructured":"Michelashvili, M., & Wolf, L. (2019). 2019. CoRR: Audio Denoising with Deep Network Priors."},{"issue":"7","key":"9809_CR11","doi-asserted-by":"publisher","first-page":"1179","DOI":"10.1109\/TASLP.2019.2913512","volume":"27","author":"A Pandey","year":"2019","unstructured":"Pandey, A., & Wang, D. (2019). A new framework for CNN-based speech enhancement in the time domain. IEEE\/ACM Transactions on Audio, Speech and Language Processing, 27(7), 1179\u20131188.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing"},{"key":"9809_CR12","doi-asserted-by":"crossref","unstructured":"Pascual, S., Bonafonte, A., & Serra, J. (2017). SEGAN: Speech enhancement generative adversarial network. In Proceedings of INTERSPEECH, 3642\u20133646.","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"9809_CR13","doi-asserted-by":"crossref","unstructured":"Pohjalainen, J., Ringeval, F., Zhang, Z., & Schuller, B. (2016). Spectral and cepstral audio noise reduction techniques in speech emotion recognition. In Proceedings of the 24th ACM International Conference on Multimedia, 670\u2013674","DOI":"10.1145\/2964284.2967306"},{"key":"9809_CR14","first-page":"749","volume":"2","author":"AW Rix","year":"2001","unstructured":"Rix, A. W., Beerends, J. G., Hollier, M. P., & Hekstra, A. P. (2001). Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs. Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing, 2, 749\u2013752.","journal-title":"Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing"},{"key":"9809_CR15","doi-asserted-by":"crossref","unstructured":"Shivakumar, P. G., & Georgiou, P. G. (2016). Perception optimized deep denoising autoencoders for speech enhancement. In Proc. INTERSPEECH, 3743\u20133747.","DOI":"10.21437\/Interspeech.2016-1284"},{"key":"9809_CR16","doi-asserted-by":"crossref","unstructured":"Sun, L., Du, J., Dai, L., & Lee, C. (2017). Multiple-target deep learning for LSTM-RNN based speech enhancement. In Proceedings of the hands-free speech communications and microphone arrays conference, 136\u2013140.","DOI":"10.1109\/HSCMA.2017.7895577"},{"issue":"7","key":"9809_CR17","doi-asserted-by":"publisher","first-page":"2125","DOI":"10.1109\/TASL.2011.2114881","volume":"19","author":"CH Taal","year":"2011","unstructured":"Taal, C. H., Hendriks, R. C., Heusdens, R., & Jensen, J. (2011). An algorithm for intelligibility prediction of time-frequency weighted noisy speech. IEEE Transactions on Audio, Speech, and Language Processing, 19(7), 2125\u20132136.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9809_CR18","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/TASLP.2019.2955276","volume":"28","author":"K Tan","year":"2019","unstructured":"Tan, K., & Wang, D. (2019). Learning complex spectral mapping with gated convolutional recurrent networks for monaural speech enhancement. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 28, 380\u2013390.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"10","key":"9809_CR19","first-page":"2449","volume":"13","author":"Thiruvengadam","year":"2017","unstructured":"Thiruvengadam. (2017). Speech\/music classification using MFCC and KNN. International Journal of Computational Intelligence Research, 13(10), 2449\u20132452.","journal-title":"International Journal of Computational Intelligence Research"},{"issue":"1","key":"9809_CR20","first-page":"19","volume":"1","author":"V Tiwari","year":"2010","unstructured":"Tiwari, V. (2010). MFCC and its applications in speaker recognition. International Journal on Emerging Technologies, 1(1), 19\u201322.","journal-title":"International Journal on Emerging Technologies"},{"key":"9809_CR22","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., & Manzagol, P. A. (2008). Extracting and composing robust features with denoising autoencoders. In Proceedings of the International Conference on Machine Learning, 1096\u20131103.","DOI":"10.1145\/1390156.1390294"},{"key":"9809_CR21","first-page":"3371","volume":"11","author":"P Vincent","year":"2010","unstructured":"Vincent, P., Larochelle, H., Lajoie, I., Bengio, Y., & Manzagol, P. A. (2010). Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion. Journal of Machine Learning Research, 11, 3371\u20133408.","journal-title":"Journal of Machine Learning Research"},{"issue":"10","key":"9809_CR23","doi-asserted-by":"publisher","first-page":"1702","DOI":"10.1109\/TASLP.2018.2842159","volume":"26","author":"D Wang","year":"2018","unstructured":"Wang, D., & Chen, J. (2018). Supervised speech separation based on deep learning: An overview. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 26(10), 1702\u20131726.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"11","key":"9809_CR24","doi-asserted-by":"publisher","first-page":"2122","DOI":"10.1109\/TASLP.2016.2598306","volume":"24","author":"JC Wang","year":"2016","unstructured":"Wang, J. C., Lee, Y. S., Lin, C. H., Wang, S. F., Shih, C. H., & Wu, C. H. (2016). Compressive sensing-based speech enhancement. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 24(11), 2122\u20132131.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9809_CR25","doi-asserted-by":"crossref","unstructured":"Welk, M., Bergmeister, A., & Weickert, J. (2015). Denoising of audio data by nonlinear diffusion. In Scale space and PDE methods in computer vision. Lecture notes in computer science, 3459, 598\u2013609.","DOI":"10.1007\/11408031_51"},{"key":"9809_CR26","doi-asserted-by":"crossref","unstructured":"Wilson, K. W., Raj, B., Smaragdis, P., & Divakaran, A. (2009). Speech denoising using nonnegative matrix factorization with priors. In Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing, 4029\u20134032.","DOI":"10.1109\/ICASSP.2008.4518538"},{"issue":"1","key":"9809_CR27","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1109\/LSP.2013.2291240","volume":"21","author":"Y Xu","year":"2014","unstructured":"Xu, Y., Du, J., Dai, L. R., & Lee, C. H. (2014). An experimental study on speech enhancement based on deep neural networks. IEEE Signal Processing Letters, 21(1), 65\u201368.","journal-title":"IEEE Signal Processing Letters"},{"key":"9809_CR28","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1016\/j.cogsys.2018.07.004","volume":"53","author":"O Yildirim","year":"2018","unstructured":"Yildirim, O., Tan, R. S., & Acharya, U. R. (2018). An efficient compression of ECG signals using deep convolutional autoencoders. Cognitive Systems Research, 53, 198\u2013211.","journal-title":"Cognitive Systems Research"},{"key":"9809_CR29","doi-asserted-by":"crossref","unstructured":"Yu, G., Bacry, E., & Mallat, S. (2007). Audio signal denoising with complex wavelets and adaptive block attenuation. In Proceedings of the IEEE international conference on acoustics, speech and signal processing, 863\u2013869.","DOI":"10.1109\/ICASSP.2007.366818"},{"issue":"4","key":"9809_CR30","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1109\/TASLP.2018.2887337","volume":"27","author":"Z Zhao","year":"2019","unstructured":"Zhao, Z., Liu, H., & Fingscheidt, T. (2019). Convolutional neural networks to enhance coded speech. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 27(4), 663\u2013678.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09809-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-021-09809-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09809-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,14]],"date-time":"2022-12-14T10:41:23Z","timestamp":1671014483000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-021-09809-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,28]]},"references-count":30,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["9809"],"URL":"https:\/\/doi.org\/10.1007\/s10772-021-09809-z","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,28]]},"assertion":[{"value":"24 July 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 January 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that there is no conflict of interest regarding the publication of this paper and that the work presented in this article is not supported by any funding agency.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}