{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T15:01:09Z","timestamp":1778684469241,"version":"3.51.4"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2017,1,6]],"date-time":"2017-01-06T00:00:00Z","timestamp":1483660800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003176","name":"Ministerio de Educaci\u00f3n, Cultura y Deporte","doi-asserted-by":"publisher","award":["TEC2013-46690-P"],"award-info":[{"award-number":["TEC2013-46690-P"]}],"id":[{"id":"10.13039\/501100003176","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1007\/s00034-016-0480-7","type":"journal-article","created":{"date-parts":[[2017,1,6]],"date-time":"2017-01-06T04:58:38Z","timestamp":1483678718000},"page":"3731-3760","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Spectral Reconstruction and Noise Model Estimation Based on a Masking Model for Noise Robust Speech Recognition"],"prefix":"10.1007","volume":"36","author":[{"given":"Jose A.","family":"Gonzalez","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9995-3068","authenticated-orcid":false,"given":"Angel M.","family":"G\u00f3mez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8214-6676","authenticated-orcid":false,"given":"Antonio M.","family":"Peinado","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4112-3109","authenticated-orcid":false,"given":"Ning","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1684-5660","authenticated-orcid":false,"given":"Jon","family":"Barker","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,1,6]]},"reference":[{"key":"480_CR1","doi-asserted-by":"crossref","unstructured":"A. Acero, L. Deng, T. Kristjansson, J. Zhang, HMM adaptation using vector Taylor series for noisy speech recognition, in Proceedings of ICSLP, pp. 229\u2013232 (2000)","DOI":"10.21437\/ICSLP.2000-672"},{"issue":"3","key":"480_CR2","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1109\/MSP.2009.932166","volume":"26","author":"JM Baker","year":"2009","unstructured":"J.M. Baker, L. Deng, J. Glass, S. Khudanpur, C.H. Lee, N. Morgan, D. O\u2019Shaughnessy, Research developments and directions in speech recognition and understanding, part 1. IEEE Signal Process. Mag. 26(3), 75\u201380 (2009)","journal-title":"IEEE Signal Process. Mag."},{"issue":"4","key":"480_CR3","doi-asserted-by":"crossref","first-page":"78","DOI":"10.1109\/MSP.2009.932707","volume":"26","author":"JM Baker","year":"2009","unstructured":"J.M. Baker, L. Deng, S. Khudanpur, C.H. Lee, J. Glass, N. Morgan, D. O\u2019Shaughnessy, Updated MINDS report on speech recognition and understanding, part 2. IEEE Signal Process. Mag. 26(4), 78\u201385 (2009)","journal-title":"IEEE Signal Process. Mag."},{"issue":"1","key":"480_CR4","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/j.specom.2004.05.002","volume":"45","author":"J Barker","year":"2005","unstructured":"J. Barker, M. Cooke, D.P.W. Ellis, Decoding speech in the presence of other sources. Speech Commun. 45(1), 5\u201325 (2005)","journal-title":"Speech Commun."},{"key":"480_CR5","doi-asserted-by":"crossref","unstructured":"J. Barker, L. Josifovski, M.P. Cooke, P.D. Green, Soft decisions in missing data techniques for robust automatic speech recognition, in Proceedings of ICSLP (2000)","DOI":"10.21437\/ICSLP.2000-92"},{"issue":"3","key":"480_CR6","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1016\/j.csl.2006.08.001","volume":"21","author":"C Cerisara","year":"2007","unstructured":"C. Cerisara, S. Demange, J.P. Haton, On noise masking for automatic missing data speech recognition: a survey and discussion. Comput. Speech Lang. 21(3), 443\u2013457 (2007)","journal-title":"Comput. Speech Lang."},{"issue":"3","key":"480_CR7","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1016\/S0167-6393(00)00034-0","volume":"34","author":"M Cooke","year":"2001","unstructured":"M. Cooke, P.D. Green, L. Josifovski, A. Vizinho, Robust automatic speech recognition with missing and unreliable acoustic data. Speech Commun. 34(3), 267\u2013285 (2001)","journal-title":"Speech Commun."},{"key":"480_CR8","doi-asserted-by":"crossref","unstructured":"M. Cooke, A. Morris, P.D. Green, Missing data techniques for robust speech recognition, in Proceedings of ICASSP, pp. 863\u2013866 (1997)","DOI":"10.1109\/ICASSP.1997.596072"},{"key":"480_CR9","unstructured":"M. Delcroix, K. Kinoshita, T. Nakatani, S. Araki, A. Ogawa, T. Hori, S. Watanabe, M. Fujimoto, T. Yoshioka, T. Oba, et\u00a0al, Speech recognition in the presence of highly non-stationary noise based on spatial, spectral and temporal speech\/noise modeling combined with dynamic variance adaptation, in Proceedings of the 1st International Workshop on Machine Listening in Multisource Environments (CHiME), pp. 12\u201317 (2011)"},{"issue":"1","key":"480_CR10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"A.P. Dempster, N.M. Laird, D.B. Rubin, Maximum likelihood from incomplete data via the EM algorithm. J. R. Stat. Soc. 39(1), 1\u201338 (1977)","journal-title":"J. R. Stat. Soc."},{"issue":"2","key":"480_CR11","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1109\/TSA.2003.820201","volume":"12","author":"L Deng","year":"2004","unstructured":"L. Deng, J. Droppo, A. Acero, Enhancement of log mel power spectra of speech using a phase-sensitive model of the acoustic environment and sequential estimation of the corrupting noise. IEEE Trans. Speech Audio Process. 12(2), 133\u2013143 (2004)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"480_CR12","unstructured":"P.J. Dhrymes, Moments of truncated (normal) distributions (2005)"},{"key":"480_CR13","unstructured":"ETSI: ETSI ES 201 108\u2014Distributed speech recognition; front-end feature extraction algorithm; compression algorithms (2003)"},{"key":"480_CR14","unstructured":"ETSI: ETSI ES 202 050\u2014Distributed speech recognition; advanced front-end feature extraction algorithm; compression algorithms (2007)"},{"key":"480_CR15","doi-asserted-by":"crossref","unstructured":"F. Faubel, J. McDonough, D. Klakow, A phase-averaged model for the relationship between noisy speech, clean speech and noise in the log-mel domain, in Proceedings of the Interspeech, pp. 553\u2013556 (2008)","DOI":"10.21437\/Interspeech.2008-164"},{"key":"480_CR16","doi-asserted-by":"crossref","unstructured":"F. Faubel, J. McDonough, D. Klakow, Bounded conditional mean imputation with Gaussian mixture models: a reconstruction approach to partly occluded features, in Proceedings of the ICASSP, pp. 3869\u20133872 (2009)","DOI":"10.1109\/ICASSP.2009.4960472"},{"key":"480_CR17","unstructured":"F. Faubel, H. Raja, J. McDonough, D. Klakow, Particle filter based soft-mask estimation for missing feature reconstruction, in Proceedings of the IWAENC (2008)"},{"key":"480_CR18","doi-asserted-by":"crossref","unstructured":"J.A. Gonz\u00e1lez, A.M. Peinado, A.M. G\u00f3mez, MMSE feature reconstruction based on an occlusion model for robust ASR, in Advances in Speech and Language Technologies for Iberian Languages\u2014IberSPEECH 2012, Communications in Computer and Information Science, (Springer, 2012), pp. 217\u2013226","DOI":"10.1007\/978-3-642-35292-8_23"},{"key":"480_CR19","doi-asserted-by":"crossref","unstructured":"J.A. Gonz\u00e1lez, A.M. Peinado, A.M. G\u00f3mez, N. Ma, Log-spectral feature reconstruction based on an occlusion model for noise robust speech recognition, in Proceedings of the Interspeech, pp. 2630\u20132633 (2012)","DOI":"10.21437\/Interspeech.2012-504"},{"issue":"3","key":"480_CR20","doi-asserted-by":"crossref","first-page":"624","DOI":"10.1109\/TASL.2012.2229982","volume":"21","author":"JA Gonz\u00e1lez","year":"2013","unstructured":"J.A. Gonz\u00e1lez, A.M. Peinado, N. Ma, A.M. G\u00f3mez, J. Barker, MMSE-based missing-feature reconstruction with temporal modeling for robust speech recognition. IEEE Trans. Audio Speech Lang. Process. 21(3), 624\u2013635 (2013)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"480_CR21","doi-asserted-by":"crossref","unstructured":"R.C. Hendriks, R. Heusdens, J. Jensen, MMSE based noise PSD tracking with low complexity, in Proceedings of the ICASSP, pp. 4266\u20134269 (2010)","DOI":"10.1109\/ICASSP.2010.5495680"},{"key":"480_CR22","unstructured":"H.G. Hirsch, Experimental framework for the performance evaluation of speech recognition front-ends of large vocabulary task (Tech. rep, STQ AURORA DSR Working Group, 2002)"},{"key":"480_CR23","first-page":"181","volume":"2000","author":"HG Hirsch","year":"2000","unstructured":"H.G. Hirsch, D. Pearce, The Aurora experimental framework for the performance evaluations of speech recognitions systems under noise conditions. Proc. ISCA ITRW ASR 2000, 181\u2013188 (2000)","journal-title":"Proc. ISCA ITRW ASR"},{"key":"480_CR24","doi-asserted-by":"crossref","unstructured":"V. Leutnant, R. Haeb-Umbach, An analytic derivation of a phase-sensitive observation model for noise robust speech recognition, in Proceedings of the Interspeech, pp. 2395\u20132398 (2009)","DOI":"10.21437\/Interspeech.2009-367"},{"issue":"4","key":"480_CR25","doi-asserted-by":"crossref","first-page":"745","DOI":"10.1109\/TASLP.2014.2304637","volume":"22","author":"J Li","year":"2014","unstructured":"J. Li, L. Deng, Y. Gong, R. Haeb-Umbach, An overview of noise-robust automatic speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(4), 745\u2013777 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"480_CR26","volume-title":"Robust Automatic Speech Recognition: A Bridge to Practical Applications","author":"J Li","year":"2015","unstructured":"J. Li, L. Deng, R. Haeb-Umbach, Y. Gong, Robust Automatic Speech Recognition: A Bridge to Practical Applications (Academic Press, Cambridge, 2015)"},{"key":"480_CR27","doi-asserted-by":"crossref","DOI":"10.1201\/9781420015836","volume-title":"Speech Enhancement: Theory and Practice","author":"PC Loizou","year":"2007","unstructured":"P.C. Loizou, Speech Enhancement: Theory and Practice (CRC, Boca Raton, 2007)"},{"issue":"12","key":"480_CR28","doi-asserted-by":"crossref","first-page":"874","DOI":"10.1016\/j.specom.2007.05.003","volume":"49","author":"N Ma","year":"2007","unstructured":"N. Ma, P. Green, J. Barker, A. Coy, Exploiting correlogram structure for robust speech recognition with multiple speech sources. Speech Commun. 49(12), 874\u2013891 (2007)","journal-title":"Speech Commun."},{"issue":"5","key":"480_CR29","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1109\/89.928915","volume":"9","author":"R Martin","year":"2001","unstructured":"R. Martin, Noise power spectral density estimation based on optimal smoothing and minimum statistics. IEEE Trans. Speech Audio Process. 9(5), 504\u2013512 (2001)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"480_CR30","doi-asserted-by":"crossref","unstructured":"J.A. Morales-Cordovilla, N. Ma, V.E. S\u00e1nchez, J.L. Carmona, A.M. Peinado, J. Barker, A pitch based noise estimation technique for robust speech recognition with missing data, in Proceedings of the ICASSP, pp. 4808\u20134811 (2011)","DOI":"10.1109\/ICASSP.2011.5947431"},{"key":"480_CR31","unstructured":"P.J. Moreno, Speech recognition in noisy environments. Ph.D. thesis, Carnegie Mellon University (1996)"},{"issue":"10","key":"480_CR32","doi-asserted-by":"crossref","first-page":"1495","DOI":"10.1109\/29.35387","volume":"37","author":"A N\u00e1das","year":"1989","unstructured":"A. N\u00e1das, D. Nahamoo, M.A. Picheny, Speech recognition using noise-adaptive prototypes. IEEE Trans. Acoust. Speech Signal Process. 37(10), 1495\u20131503 (1989)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"480_CR33","doi-asserted-by":"crossref","unstructured":"T. Nakatani, T. Yoshioka, S. Araki, M. Delcroix, M. Fujimoto, Logmax observation model with MFCC-based spectral prior for reduction of highly nonstationary ambient noise, in Proceedings of the ICASSP, pp. 4029\u20134032 (2012)","DOI":"10.1109\/ICASSP.2012.6288802"},{"issue":"12","key":"480_CR34","doi-asserted-by":"crossref","first-page":"724","DOI":"10.1049\/el:20060510","volume":"42","author":"MH Radfar","year":"2006","unstructured":"M.H. Radfar, A.H. Banihashemi, R.M. Dansereau, A. Sayadiyan, Nonlinear minimum mean square error estimator for mixture-maximisation approximation. Electron. Lett. 42(12), 724\u2013725 (2006)","journal-title":"Electron. Lett."},{"issue":"4","key":"480_CR35","doi-asserted-by":"crossref","first-page":"275","DOI":"10.1016\/j.specom.2004.03.007","volume":"48","author":"B Raj","year":"2004","unstructured":"B. Raj, M.L. Seltzer, R.M. Stern, Reconstruction of missing features for robust speech recognition. Speech Commun. 48(4), 275\u2013296 (2004)","journal-title":"Speech Commun."},{"key":"480_CR36","doi-asserted-by":"crossref","unstructured":"B. Raj, R. Singh, Reconstructing spectral vectors with uncertain spectrographic masks for robust speech recognition, in Proceedings of the ASRU, pp. 65\u201370 (2005)","DOI":"10.1109\/ASRU.2005.1566472"},{"issue":"5","key":"480_CR37","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1109\/MSP.2005.1511828","volume":"22","author":"B Raj","year":"2005","unstructured":"B. Raj, R.M. Stern, Missing-feature approaches in speech recognition. IEEE Signal Process. Mag. 22(5), 101\u2013116 (2005)","journal-title":"IEEE Signal Process. Mag."},{"key":"480_CR38","volume-title":"Voice Activity Detection. Fundamentals and Speech Recognition System Robustness","author":"J Ram\u00edrez","year":"2007","unstructured":"J. Ram\u00edrez, J.M. G\u00f3rriz, J.C. Segura, Voice Activity Detection. Fundamentals and Speech Recognition System Robustness (INTECH Open Access Publisher, NewYork, 2007)"},{"issue":"3","key":"480_CR39","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1016\/j.specom.2003.10.002","volume":"42","author":"J Ram\u00edrez","year":"2004","unstructured":"J. Ram\u00edrez, J.C. Segura, C. Ben\u00edtez, A. De La Torre, A. Rubio, Efficient voice activity detection algorithms using long-term speech information. Speech Commun. 42(3), 271\u2013287 (2004)","journal-title":"Speech Commun."},{"key":"480_CR40","unstructured":"A.M. Reddy, B. Raj, Soft mask estimation for single channel speaker separation, in Workshop on Statistical and Perceptual Audio Processing SAPA (2004)"},{"issue":"6","key":"480_CR41","doi-asserted-by":"crossref","first-page":"1766","DOI":"10.1109\/TASL.2007.901310","volume":"15","author":"AM Reddy","year":"2007","unstructured":"A.M. Reddy, B. Raj, Soft mask methods for single-channel speaker separation. IEEE Trans. Audio Speech Lang. Process. 15(6), 1766\u20131776 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"480_CR42","doi-asserted-by":"crossref","unstructured":"U. Remes, Y. Nankaku, K. Tokuda, GMM-based missing-feature reconstruction on multi-frame windows, in Proceedings of the Interspeech, pp. 1665\u20131668 (2011)","DOI":"10.21437\/Interspeech.2011-214"},{"issue":"6","key":"480_CR43","first-page":"66","volume":"27","author":"SJ Rennie","year":"2010","unstructured":"S.J. Rennie, J.R. Hershey, P.A. Olsen, Single-channel multitalker speech recognition. IEEE Signal Process. Mag. 27(6), 66\u201380 (2010)","journal-title":"IEEE Signal Process. Mag."},{"key":"480_CR44","doi-asserted-by":"crossref","unstructured":"S.T. Roweis, Factorial models and refiltering for speech separation and denoising, in Proceedings of the Eurospeech, pp. 1009\u20131012 (2003)","DOI":"10.21437\/Eurospeech.2003-345"},{"key":"480_CR45","doi-asserted-by":"crossref","unstructured":"J.C. Segura, A. de\u00a0la Torre, M.C. Ben\u00edtez, A.M. Peinado, Model-based compensation of the additive noise for continuous speech recognition. Experiments using the Aurora II database and tasks, In Proceedings of the Eurospeech, pp. 221\u2013224 (2001)","DOI":"10.21437\/Eurospeech.2001-78"},{"key":"480_CR46","first-page":"433","volume":"1","author":"V Stouten","year":"2005","unstructured":"V. Stouten, H. Van Hamme, P. Wambacq, Effect of phase-sensitive environment model and higher order VTS on noisy speech feature enhancement. Proc. ICASSP 1, 433\u2013436 (2005)","journal-title":"Proc. ICASSP"},{"key":"480_CR47","doi-asserted-by":"crossref","unstructured":"A.P. Varga, R.K. Moore, Hidden Markov model decomposition of speech and noise, in Proceedings of the ICASSP, pp. 845\u2013848 (1990)","DOI":"10.1109\/ICASSP.1990.115970"},{"key":"480_CR48","doi-asserted-by":"crossref","unstructured":"T. Virtanen, R. Singh, B. Raj (eds.), Techniques for Noise Robustness in Automatic Speech Recognition (Wiley, Chichester, West Sussex, 2012)","DOI":"10.1002\/9781118392683"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00034-016-0480-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-016-0480-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-016-0480-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,21]],"date-time":"2024-06-21T16:22:17Z","timestamp":1718986937000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00034-016-0480-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,1,6]]},"references-count":48,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2017,9]]}},"alternative-id":["480"],"URL":"https:\/\/doi.org\/10.1007\/s00034-016-0480-7","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,1,6]]}}}