{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:18:49Z","timestamp":1772119129967,"version":"3.50.1"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,8,5]],"date-time":"2022-08-05T00:00:00Z","timestamp":1659657600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,5]],"date-time":"2022-08-05T00:00:00Z","timestamp":1659657600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s00034-022-02123-2","type":"journal-article","created":{"date-parts":[[2022,8,5]],"date-time":"2022-08-05T02:08:51Z","timestamp":1659665331000},"page":"322-343","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Neural Comb Filtering Using Sliding Window Attention Network for Speech Enhancement"],"prefix":"10.1007","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6341-9480","authenticated-orcid":false,"given":"Venkatesh","family":"Parvathala","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4782-232X","authenticated-orcid":false,"given":"Sivaganesh","family":"Andhavarapu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5945-6860","authenticated-orcid":false,"given":"Giridhar","family":"Pamisetty","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6355-5287","authenticated-orcid":false,"given":"K. Sri Rama","family":"Murty","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,5]]},"reference":[{"key":"2123_CR1","unstructured":"I. Beltagy, M.E. Peters, A. Cohan, Longformer: the long-document transformer. arXiv preprint arXiv:2004.05150 (2020)"},{"key":"2123_CR2","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S Boll","year":"1979","unstructured":"S. Boll, Suppression of acoustic noise in speech using spectral subtraction. IEEE Trans. Speech Signal 27, 113\u2013120 (1979)","journal-title":"IEEE Trans. Speech Signal"},{"issue":"2","key":"2123_CR3","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1109\/89.279283","volume":"2","author":"O Cappe","year":"1994","unstructured":"O. Cappe, Elimination of the musical noise phenomenon with the Ephraim and Malah noise suppressor. IEEE Trans. Speech Audio Process. 2(2), 345\u2013349 (1994). https:\/\/doi.org\/10.1109\/89.279283","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"2123_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/S0165-1684(01)00128-1","author":"I Cohen","year":"2001","unstructured":"I. Cohen, B. Berdugo, Speech enhancement for non-stationary noise environments. Signal Process. (2001). https:\/\/doi.org\/10.1016\/S0165-1684(01)00128-1","journal-title":"Signal Process."},{"issue":"1","key":"2123_CR5","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1109\/TASSP.1980.1163353","volume":"28","author":"R Crochiere","year":"1980","unstructured":"R. Crochiere, A weighted overlap-add method of short-time Fourier analysis\/synthesis. IEEE Trans. Acoust. Speech Signal Process. 28(1), 99\u2013102 (1980)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"2123_CR6","unstructured":"A. Defossez, G. Synnaeve, Y. Adi, DEMUCS implementation codes and pre-trained models. https:\/\/github.com\/facebookresearch\/denoiser"},{"key":"2123_CR7","doi-asserted-by":"publisher","unstructured":"A. D\u00e9fossez, G. Synnaeve, Y. Adi, Real time speech enhancement in the waveform domain, in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2409","DOI":"10.21437\/Interspeech.2020-2409"},{"issue":"11","key":"2123_CR8","doi-asserted-by":"publisher","first-page":"1803","DOI":"10.1109\/TASLP.2019.2933698","volume":"27","author":"S Elshamy","year":"2019","unstructured":"S. Elshamy, T. Fingscheidt, DNN-based cepstral excitation manipulation for speech enhancement. IEEE\/ACM Trans. Audio Speech Lang Process 27(11), 1803\u20131814 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang Process"},{"key":"2123_CR9","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164453","author":"Y Ephraim","year":"1984","unstructured":"Y. Ephraim, D. Malah, Speech enhancement using a minimum mean-square error short-time spectral amplitude estimator. IEEE Trans. Acoust. Speech Signal Process. (1984). https:\/\/doi.org\/10.1109\/TASSP.1984.1164453","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"2123_CR10","doi-asserted-by":"crossref","unstructured":"H. Erdogan, J.R. Hershey, S. Watanabe, J. Le Roux, Phase-sensitive and recognition-boosted speech separation using deep recurrent neural networks, in 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2015), pp. 708\u2013712","DOI":"10.1109\/ICASSP.2015.7178061"},{"key":"2123_CR11","unstructured":"S.W. Fu, C.F. Liao, Y. Tsao, S.D. Lin, Metricgan: generative adversarial networks based black-box metric scores optimization for speech enhancement, in International Conference on Machine Learning (PMLR, 2019), pp. 2031\u20132041"},{"key":"2123_CR12","doi-asserted-by":"publisher","unstructured":"S.W. Fu, Y. Tsao, X. Lu, SNR-aware convolutional neural network modeling for speech enhancement, in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH (2016). https:\/\/doi.org\/10.21437\/Interspeech.2016-211","DOI":"10.21437\/Interspeech.2016-211"},{"key":"2123_CR13","unstructured":"S.W. Fu, C. Yu, T.A. Hsieh, P. Plantinga, M. Ravanelli, X. Lu, Y. Tsao, Metricgan+ pre-trained model and inference function. https:\/\/huggingface.co\/speechbrain\/metricgan-plus-voicebank"},{"key":"2123_CR14","unstructured":"S.W. Fu, C. Yu, T.A. Hsieh, P. Plantinga, M. Ravanelli, X. Lu, Y. Tsao, Metricgan+: an improved version of Metricgan for speech enhancement. arXiv preprint arXiv:2104.03538 (2021)"},{"key":"2123_CR15","doi-asserted-by":"publisher","first-page":"7025","DOI":"10.3390\/s21217025","volume":"21","author":"J Gnanamanickam","year":"2021","unstructured":"J. Gnanamanickam, Y. Natarajan, S. Ramasamy, A hybrid speech enhancement algorithm for voice assistance application. Sensors 21, 7025 (2021). https:\/\/doi.org\/10.3390\/s21217025","journal-title":"Sensors"},{"key":"2123_CR16","doi-asserted-by":"crossref","unstructured":"J.H.L. Hansen, B.L. Pellom, An effective quality evaluation protocol for speech enhancement algorithms, in Proceedings of the International Conference on Spoken Language Processing (ICSLP), Sydney, Australia (1998)","DOI":"10.21437\/ICSLP.1998-350"},{"key":"2123_CR17","doi-asserted-by":"crossref","unstructured":"R.C. Hendriks, R. Heusdens, J. Jensen, MMSE based noise PSD tracking with low complexity, in 2010 IEEE International Conference on Acoustics, Speech and Signal Processing (IEEE, 2010), pp. 4266\u20134269","DOI":"10.1109\/ICASSP.2010.5495680"},{"key":"2123_CR18","unstructured":"G. Hu, D. Wang, Speech segregation based on pitch tracking and amplitude modulation, in Proceedings of the 2001 IEEE Workshop on the Applications of Signal Processing to Audio and Acoustics (Cat. No. 01TH8575) (IEEE, 2001), pp. 79\u201382"},{"key":"2123_CR19","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.911054","author":"Y Hu","year":"2008","unstructured":"Y. Hu, P.C. Loizou, Evaluation of objective quality measures for speech enhancement. IEEE Trans. Audio Speech Lang. Process. (2008). https:\/\/doi.org\/10.1109\/TASL.2007.911054","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2123_CR20","doi-asserted-by":"crossref","unstructured":"S. Jafarlou, S. Khorram, V. Kothapally, J.H.L. Hansen, Analyzing large receptive field convolutional networks for distant speech recognition, in 2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU) (2019), pp. 252\u2013259","DOI":"10.1109\/ASRU46091.2019.9003805"},{"key":"2123_CR21","doi-asserted-by":"crossref","unstructured":"K. Kasi, S.A. Zahorian, Yet another algorithm for pitch tracking, in IEEE International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a01 (IEEE, 2002), pp. I\u2013361","DOI":"10.1109\/ICASSP.2002.1005751"},{"key":"2123_CR22","doi-asserted-by":"publisher","unstructured":"J. Kim, M. El-Khamy, J. Lee, T-GSA: transformer with Gaussian-weighted self-attention for speech enhancement, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9053591","DOI":"10.1109\/ICASSP40776.2020.9053591"},{"key":"2123_CR23","unstructured":"J. Kim, M. El-Kharmy, End-to-end multi-task denoising for joint SDR and PESQ optimization. arXiv:1901.09146, J Lee - arXiv preprint (2019)"},{"key":"2123_CR24","unstructured":"D.P. Kingma, J.L. Ba, Adam: a method for stochastic optimization, in 3rd International Conference on Learning Representations, ICLR\u2014Conference Track Proceedings (2015)"},{"issue":"2","key":"2123_CR25","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1121\/1.398894","volume":"87","author":"DH Klatt","year":"1990","unstructured":"D.H. Klatt, L.C. Klatt, Analysis, synthesis, and perception of voice quality variations among female and male talkers. J. Acoust. Soc. Am. 87(2), 820\u2013857 (1990)","journal-title":"J. Acoust. Soc. Am."},{"key":"2123_CR26","doi-asserted-by":"publisher","DOI":"10.4103\/1463-1741.70506","author":"M Klatte","year":"2010","unstructured":"M. Klatte, T. Lachmann, M. Meis, Effects of noise and reverberation on speech perception and listening comprehension of children and adults in a classroom-like setting. Noise Health (2010). https:\/\/doi.org\/10.4103\/1463-1741.70506","journal-title":"Noise Health"},{"key":"2123_CR27","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1109\/9780470544037.ch14","volume-title":"Gradient Flow in Recurrent Nets: The Difficulty of Learning LongTerm Dependencies","author":"JF Kolen","year":"2001","unstructured":"J.F. Kolen, S.C. Kremer, Gradient Flow in Recurrent Nets: The Difficulty of Learning LongTerm Dependencies (Wiley-IEEE Press, New York, 2001), pp.237\u2013243. https:\/\/doi.org\/10.1109\/9780470544037.ch14"},{"key":"2123_CR28","doi-asserted-by":"publisher","DOI":"10.1201\/9781420015836","volume-title":"Speech Enhancement: Theory and Practice","author":"PC Loizou","year":"2007","unstructured":"P.C. Loizou, Speech Enhancement: Theory and Practice (CRC Press, Boca Raton, 2007)"},{"key":"2123_CR29","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2915167","author":"Y Luo","year":"2019","unstructured":"Y. Luo, N. Mesgarani, Conv-TasNet: surpassing ideal time-frequency magnitude masking for speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. (2019). https:\/\/doi.org\/10.1109\/TASLP.2019.2915167","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2123_CR30","doi-asserted-by":"crossref","unstructured":"D. Malah, R. Cox, A generalized comb filtering technique for speech enhancement, in ICASSP\u201982. IEEE International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a07 (IEEE, 1982), pp. 160\u2013163","DOI":"10.1109\/ICASSP.1982.1171716"},{"issue":"S1","key":"2123_CR31","doi-asserted-by":"publisher","first-page":"S99","DOI":"10.1121\/1.2017546","volume":"65","author":"MK Marguiles","year":"1979","unstructured":"M.K. Marguiles, Male\u2013female differences in speaker intelligibility; normal and hearing-impaired listeners. J. Acoust. Soc. Am. 65(S1), S99\u2013S99 (1979)","journal-title":"J. Acoust. Soc. Am."},{"key":"2123_CR32","unstructured":"R. Martin, Spectral subtraction based on minimum statistics, in European Signal Processing Conference (EUSIPCO)\u2013Proceedings, pp. 1182\u20131185 (1994)"},{"issue":"5","key":"2123_CR33","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1109\/89.928915","volume":"9","author":"R Martin","year":"2001","unstructured":"R. Martin, Noise power spectral density estimation based on optimal smoothing and minimum statistics. IEEE Trans. Speech Audio Process. 9(5), 504\u2013512 (2001)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"8","key":"2123_CR34","doi-asserted-by":"publisher","first-page":"1602","DOI":"10.1109\/TASL.2008.2004526","volume":"16","author":"KSR Murty","year":"2008","unstructured":"K.S.R. Murty, B. Yegnanarayana, Epoch extraction from speech signals. IEEE Trans. Audio Speech Lang. Process. 16(8), 1602\u20131613 (2008)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"5","key":"2123_CR35","doi-asserted-by":"publisher","first-page":"1124","DOI":"10.1109\/TASSP.1986.1164952","volume":"34","author":"A Nehorai","year":"1986","unstructured":"A. Nehorai, B. Porat, Adaptive comb filtering for harmonic signal enhancement. IEEE Trans. Acoust. Speech Signal Process. 34(5), 1124\u20131138 (1986)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"2123_CR36","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1016\/j.specom.2019.06.002","volume":"111","author":"A Nicolson","year":"2019","unstructured":"A. Nicolson, K.K. Paliwal, Deep learning for minimum mean-square error approaches to speech enhancement. Speech Commun. 111, 44\u201355 (2019)","journal-title":"Speech Commun."},{"key":"2123_CR37","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1016\/j.specom.2020.10.004","volume":"125","author":"A Nicolson","year":"2020","unstructured":"A. Nicolson, K.K. Paliwal, Masked multi-head self-attention for causal speech enhancement. Speech Commun. 125, 80\u201396 (2020)","journal-title":"Speech Commun."},{"key":"2123_CR38","doi-asserted-by":"publisher","DOI":"10.1121\/1.1910339","author":"AM Noll","year":"1967","unstructured":"A.M. Noll, Cepstrum pitch determination. J. Acoust. Soc. Am. (1967). https:\/\/doi.org\/10.1121\/1.1910339","journal-title":"J. Acoust. Soc. Am."},{"key":"2123_CR39","doi-asserted-by":"publisher","unstructured":"S.R. Park, J.W. Lee, A fully convolutional neural network for speech enhancement, in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH (2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-1465","DOI":"10.21437\/Interspeech.2017-1465"},{"key":"2123_CR40","doi-asserted-by":"publisher","unstructured":"S. Pascual, A. Bonafonte, J. Serra, SEGAN: speech enhancement generative adversarial network, in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH (2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-1428","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"2123_CR41","volume-title":"Kernel Adaptive Filtering: A Comprehensive Introduction","author":"JC Pr\u00edncipe","year":"2011","unstructured":"J.C. Pr\u00edncipe, W. Liu, S. Haykin, Kernel Adaptive Filtering: A Comprehensive Introduction (Wiley, New York, 2011)"},{"key":"2123_CR42","volume-title":"Fundamentals of Speech Recognition","author":"L Rabiner","year":"1993","unstructured":"L. Rabiner, B.H. Juang, Fundamentals of Speech Recognition (Prentice-Hall, Inc., Hoboken, 1993)"},{"key":"2123_CR43","doi-asserted-by":"publisher","unstructured":"P.S. Rani, S. Andhavarapu, S.R. Murty Kodukula, Significance of phase in DNN based speech enhancement algorithms, in 26th National Conference on Communications, NCC (2020). https:\/\/doi.org\/10.1109\/NCC48643.2020.9056089","DOI":"10.1109\/NCC48643.2020.9056089"},{"key":"2123_CR44","volume-title":"Discrete Cosine Transform: Algorithms, Advantages, Applications","author":"KR Rao","year":"2014","unstructured":"K.R. Rao, P. Yip, Discrete Cosine Transform: Algorithms, Advantages, Applications (Academic Press, New York, 2014)"},{"key":"2123_CR45","unstructured":"I.T. Recommendation, Perceptual evaluation of speech quality (PESQ): An objective method for end-to-end speech quality assessment of narrow-band telephone networks and speech codecs. Rec. ITU-T P. 862 (2001)"},{"key":"2123_CR46","doi-asserted-by":"publisher","unstructured":"M. Romaniuk, P. Masztalski, K. Piaskowski, M. Matuszewski, Efficient low-latency speech enhancement with mobile audio streaming networks, in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2443","DOI":"10.21437\/Interspeech.2020-2443"},{"key":"2123_CR47","doi-asserted-by":"publisher","unstructured":"P. Scalart, J.V. Filho, Speech enhancement based on a priori signal to noise estimation, in ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing\u2014Proceedings, vol.\u00a02 (1996). https:\/\/doi.org\/10.1109\/icassp.1996.543199","DOI":"10.1109\/icassp.1996.543199"},{"key":"2123_CR48","doi-asserted-by":"publisher","unstructured":"M.H. Soni, N. Shah, H.A. Patil, Time-frequency masking-based speech enhancement using generative adversarial network, in ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing\u2014Proceedings, vol. 2018 (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8462068","DOI":"10.1109\/ICASSP.2018.8462068"},{"issue":"11","key":"2123_CR49","doi-asserted-by":"publisher","first-page":"1486","DOI":"10.1016\/j.specom.2006.09.003","volume":"48","author":"S Srinivasan","year":"2006","unstructured":"S. Srinivasan, N. Roman, D. Wang, Binary and ratio time-frequency masks for robust speech recognition. Speech Commun. 48(11), 1486\u20131501 (2006)","journal-title":"Speech Commun."},{"key":"2123_CR50","doi-asserted-by":"publisher","unstructured":"C.H. Taal, R.C. Hendriks, R. Heusdens, J. Jensen, A short-time objective intelligibility measure for time-frequency weighted noisy speech, in ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing\u2014Proceedings (2010). https:\/\/doi.org\/10.1109\/ICASSP.2010.5495701","DOI":"10.1109\/ICASSP.2010.5495701"},{"key":"2123_CR51","doi-asserted-by":"publisher","DOI":"10.1145\/3530811","author":"Y Tay","year":"2022","unstructured":"Y. Tay, M. Dehghani, D. Bahri, D. Metzler, Efficient transformers: a survey. ACM Comput. Surv. (2022). https:\/\/doi.org\/10.1145\/3530811","journal-title":"ACM Comput. Surv."},{"key":"2123_CR52","doi-asserted-by":"publisher","unstructured":"D. Terpstra, H. Jagode, H. You, J. Dongarra, Collecting performance data with PAPI-C, in Proceedings of the 3rd International Workshop on Parallel Tools for High Performance Computing 2009 (2010). https:\/\/doi.org\/10.1007\/978-3-642-11261-4_11","DOI":"10.1007\/978-3-642-11261-4_11"},{"key":"2123_CR53","doi-asserted-by":"publisher","DOI":"10.1121\/1.4806631","author":"J Thiemann","year":"2013","unstructured":"J. Thiemann, N. Ito, E. Vincent, The diverse environments multi-channel acoustic noise database: a database of multichannel environmental noise recordings. J. Acoust. Soc. Am. (2013). https:\/\/doi.org\/10.1121\/1.4806631","journal-title":"J. Acoust. Soc. Am."},{"key":"2123_CR54","doi-asserted-by":"crossref","unstructured":"C. Valentini-Botinhao, X. Wang, S. Takaki, J. Yamagishi, Investigating RNN-based speech enhancement methods for noise-robust text-to-speech, in SSW (2016), pp. 146\u2013152","DOI":"10.21437\/SSW.2016-24"},{"key":"2123_CR55","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A.N. Gomez, L. Kaiser, I. Polosukhin, Attention is all you need, in Advances in Neural Information Processing Systems (2017)"},{"key":"2123_CR56","doi-asserted-by":"publisher","unstructured":"C. Veaux, J. Yamagishi, S. King, The voice bank corpus: design, collection and data analysis of a large regional accent speech database, in 2013 International Conference Oriental COCOSDA Held Jointly with 2013 Conference on Asian Spoken Language Research and Evaluation, O-COCOSDA\/CASLRE 2013 (2013). https:\/\/doi.org\/10.1109\/ICSDA.2013.6709856","DOI":"10.1109\/ICSDA.2013.6709856"},{"issue":"10","key":"2123_CR57","doi-asserted-by":"publisher","first-page":"1702","DOI":"10.1109\/TASLP.2018.2842159","volume":"26","author":"D Wang","year":"2018","unstructured":"D. Wang, J. Chen, Supervised speech separation based on deep learning: an overview. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(10), 1702\u20131726 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2123_CR58","unstructured":"K. Wang, B. He, W.P. Zhu, TSTNN implementation codes. https:\/\/github.com\/key2miao\/TSTNN"},{"key":"2123_CR59","doi-asserted-by":"publisher","unstructured":"K. Wang, B. He, W.P. Zhu, TSTNN: two-stage transformer based neural network for speech enhancement in the time domain, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2021). https:\/\/doi.org\/10.1109\/icassp39728.2021.9413740","DOI":"10.1109\/icassp39728.2021.9413740"},{"issue":"7","key":"2123_CR60","doi-asserted-by":"publisher","first-page":"1185","DOI":"10.1109\/TASLP.2018.2817798","volume":"26","author":"Q Wang","year":"2018","unstructured":"Q. Wang, J. Du, L.R. Dai, C.H. Lee, A multiobjective learning and ensembling approach to high-performance speech enhancement with compact neural network architectures. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(7), 1185\u20131197 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"12","key":"2123_CR61","doi-asserted-by":"publisher","first-page":"1849","DOI":"10.1109\/TASLP.2014.2352935","volume":"22","author":"Y Wang","year":"2014","unstructured":"Y. Wang, A. Narayanan, D. Wang, On training targets for supervised speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(12), 1849\u20131858 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2123_CR62","doi-asserted-by":"publisher","unstructured":"F. Weninger, J.R. Hershey, J. Le Roux, B. Schuller, Discriminatively trained recurrent neural networks for single-channel speech separation, in IEEE Global Conference on Signal and Information Processing, GlobalSIP (2014). https:\/\/doi.org\/10.1109\/GlobalSIP.2014.7032183","DOI":"10.1109\/GlobalSIP.2014.7032183"},{"issue":"3","key":"2123_CR63","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1109\/TASLP.2015.2512042","volume":"24","author":"DS Williamson","year":"2015","unstructured":"D.S. Williamson, Y. Wang, D. Wang, Complex ratio masking for monaural speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(3), 483\u2013492 (2015)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2123_CR64","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2013.2291240","author":"Y Xu","year":"2014","unstructured":"Y. Xu, J. Du, L.R. Dai, C.H. Lee, An experimental study on speech enhancement based on deep neural networks. IEEE Signal Process. Lett. (2014). https:\/\/doi.org\/10.1109\/LSP.2013.2291240","journal-title":"IEEE Signal Process. Lett."},{"key":"2123_CR65","doi-asserted-by":"publisher","DOI":"10.1121\/1.1852873","author":"LP Yang","year":"2005","unstructured":"L.P. Yang, Q.J. Fu, Spectral subtraction-based speech enhancement for cochlear implant patients in background noise. J. Acoust. Soc. Am. (2005). https:\/\/doi.org\/10.1121\/1.1852873","journal-title":"J. Acoust. Soc. Am."},{"issue":"1","key":"2123_CR66","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/S0167-6393(98)00070-3","volume":"28","author":"B Yegnanarayana","year":"1999","unstructured":"B. Yegnanarayana, C. Avendano, H. Hermansky, P.S. Murthy, Speech enhancement using linear prediction residual. Speech Commun. 28(1), 25\u201342 (1999)","journal-title":"Speech Commun."},{"key":"2123_CR67","doi-asserted-by":"publisher","DOI":"10.1121\/1.2916590","author":"SA Zahorian","year":"2008","unstructured":"S.A. Zahorian, H. Hu, A spectral\/temporal method for robust fundamental frequency tracking. J. Acoust. Soc. Am. (2008). https:\/\/doi.org\/10.1121\/1.2916590","journal-title":"J. Acoust. Soc. Am."},{"key":"2123_CR68","doi-asserted-by":"publisher","first-page":"2561","DOI":"10.1109\/TASLP.2021.3092585","volume":"29","author":"L Zhang","year":"2021","unstructured":"L. Zhang, M. Wang, Q. Zhang, X. Wang, M. Liu, PhaseDCN: a phase-enhanced dual-path dilated convolutional network for single-channel speech enhancement. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 2561\u20132574 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2123_CR69","doi-asserted-by":"publisher","first-page":"1404","DOI":"10.1109\/TASLP.2020.2987441","volume":"28","author":"Q Zhang","year":"2020","unstructured":"Q. Zhang, A. Nicolson, M. Wang, K.K. Paliwal, C. Wang, DeepMMSE: a deep learning approach to MMSE-based noise power spectral density estimation. IEEE\/ACM Trans. Audio Speech Lang. Process. 28, 1404\u20131415 (2020)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-02123-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-022-02123-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-02123-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T16:02:55Z","timestamp":1727712175000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-022-02123-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,5]]},"references-count":69,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["2123"],"URL":"https:\/\/doi.org\/10.1007\/s00034-022-02123-2","relation":{"has-preprint":[{"id-type":"doi","id":"10.36227\/techrxiv.15051972.v1","asserted-by":"object"},{"id-type":"doi","id":"10.36227\/techrxiv.15051972","asserted-by":"object"}]},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,5]]},"assertion":[{"value":"3 October 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 July 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 July 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 August 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}