{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:50:14Z","timestamp":1753602614365,"version":"3.41.0"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2015,8,5]],"date-time":"2015-08-05T00:00:00Z","timestamp":1438732800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["EURASIP J. Adv. Signal Process."],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1186\/s13634-015-0256-4","type":"journal-article","created":{"date-parts":[[2015,8,4]],"date-time":"2015-08-04T13:39:03Z","timestamp":1438695543000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Front-end technologies for robust ASR in reverberant environments\u2014spectral enhancement-based dereverberation and auditory modulation filterbank features"],"prefix":"10.1186","volume":"2015","author":[{"given":"Feifei","family":"Xiong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bernd T.","family":"Meyer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Niko","family":"Moritz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Rehr","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J\u00f6rn","family":"Anem\u00fcller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Timo","family":"Gerkmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simon","family":"Doclo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stefan","family":"Goetze","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,8,5]]},"reference":[{"key":"256_CR1","doi-asserted-by":"crossref","unstructured":"M W\u00f6lfel, J McDonough, Distant Speech Recognition (John Wiley & Sons Ltd, United Kingdom, 2009).","DOI":"10.1002\/9780470714089"},{"issue":"6","key":"256_CR2","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1109\/MSP.2012.2205029","volume":"29","author":"T Yoshioka","year":"2012","unstructured":"T Yoshioka, A Sehr, M Delcroix, K Kinoshita, R Maas, T Nakatani, W Kellermann, Making Machines Understand Us in Reverberant Rooms: Robustness against Reverberation for Automatic Speech Recognition. IEEE Signal Process. Mag.29(6), 114\u2013126 (2012).","journal-title":"IEEE Signal Process. Mag."},{"key":"256_CR3","doi-asserted-by":"crossref","unstructured":"EAP Habets, Single- and Multi-Microphone Speech Dereverberation using Spectral Enhancement. PhD thesis (University of Eindhoven, Eindhoven, The Netherlands, 2007).","DOI":"10.1109\/ACSSC.2008.5074521"},{"issue":"7","key":"256_CR4","doi-asserted-by":"publisher","first-page":"1717","DOI":"10.1109\/TASL.2010.2052251","volume":"18","author":"T Nakatani","year":"2010","unstructured":"T Nakatani, T Yoshioka, K Kinoshita, M Miyoshi, B-H Juang, Speech dereverberation based on variance-normalized delayed linear prediction. IEEE Trans. Audio, Speech, Lang. Process.18(7), 1717\u20131731 (2010).","journal-title":"IEEE Trans. Audio, Speech, Lang. Process."},{"issue":"9","key":"256_CR5","doi-asserted-by":"publisher","first-page":"1879","DOI":"10.1109\/TASL.2013.2260743","volume":"21","author":"I Kodrasi","year":"2013","unstructured":"I Kodrasi, S Goetze, S Doclo, Regularization for partial multichannel equalization for speech dereverberation. IEEE Trans. Audio, Speech Lang. Process.21(9), 1879\u20131890 (2013).","journal-title":"IEEE Trans. Audio, Speech Lang. Process."},{"key":"256_CR6","doi-asserted-by":"crossref","unstructured":"N Moritz, J Anem\u00fcller, B Kollmeier, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). Amplitude modulation spectrogram based features for robust speech recognition in noisy and reverberant environments (Prague, Czech Republic, 2011), pp. 5492\u20135495.","DOI":"10.1109\/ICASSP.2011.5947602"},{"issue":"7","key":"256_CR7","doi-asserted-by":"publisher","first-page":"1676","DOI":"10.1109\/TASL.2010.2050511","volume":"18","author":"A Sehr","year":"2010","unstructured":"A Sehr, R Maas, W Kellermann, Reverberation model-based decoding in the Logmelspec domain for robust distant-talking speech recognition. IEEE Trans. Audio, Speech Lang. Process.18(7), 1676\u20131691 (2010).","journal-title":"IEEE Trans. Audio, Speech Lang. Process."},{"key":"256_CR8","doi-asserted-by":"crossref","unstructured":"K Kinoshita, M Delcroix, T Yoshioka, T Nakatani, E Habets, R Haeb-Umbach, V Leutnant, A Sehr, W Kellermann, R Maas, S Gannot, B Raj, in IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA). The REVERB Challenge: a common evaluation framework for dereverberation and recognition of reverberant speech (New Paltz, NY, USA, 2013).","DOI":"10.1109\/WASPAA.2013.6701894"},{"key":"256_CR9","unstructured":"B Cauchi, I Kodrasi, R Rehr, S Gerlach, A Juki\u0107, T Gerkmann, S Doclo, S Goetze, in Proc. of the REVERB Challenge. Joint dereverberation and noise reduction using beamforming and a single-channel speech enhancement scheme (Florence, Italy, 2014)."},{"key":"256_CR10","unstructured":"F Weninger, S Watanabe, JL Roux, JR Hershey, Y Tachioka, J Geiger, B Schuller, G Rigoll, in Proc. of the REVERB Challenge. T MERL\/MELCO\/TUM System for the REVERB Challenge using Deep Recurrent Neural Network Feature Enhancement (Florence, Italy, 2014)."},{"key":"256_CR11","unstructured":"M Delcroix, T Yoshioka, A Ogawa, Y Kubo, M Fujimoto, N Ito, K Kinoshita, M Espi, T Hori, T Nakatani, A Nakamura, in Proc. of the REVERB Challenge. Linear prediction-based dereverberation with advanced speech enhancement and recognition technologies for the REVERB Challenge (Florence, Italy, 2014)."},{"key":"256_CR12","unstructured":"F Xiong, N Moritz, R Rehr, J Anem\u00fcller, BT Meyer, T Gerkmann, S Doclo, S Goetze, in Proc. of the REVERB Challenge. Robust ASR in reverberant environments using temporal cepstrum smoothing for speech enhancement and an amplitude modulation filterbank for feature extraction (Florence, Italy, 2014)."},{"key":"256_CR13","unstructured":"S Young, G Evermann, M Gales, T Hain, D Kershaw, XA Liu, G Moore, J Odell, D Ollason, D Povey, V Valtchev, P Woodland, The HTK Book (for HTK Version 3.4) (Cambridge University Engineering Department, Cambridge, 2009)."},{"key":"256_CR14","unstructured":"D Povey, A Ghoshal, G Boulianne, L Burget, O Glembek, N Goel, M Hannemann, P Motl\u00ed\u010dek, Y Qian, P Schwarz, J Silovsk\u00fd, G Stemmer, K Vesel\u00fd, in IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU). The Kaldi speech recognition toolkit (Big Island, HI, USA, 2011)."},{"key":"256_CR15","doi-asserted-by":"crossref","unstructured":"F Gr\u00e9zl, M Karafi\u00e1t, S Kont\u00e1ir, J \u010cernock\u00fd, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP), 4. Probabilistic and bottle-neck features for LVCSR of meetings (Honolulu, HI, USA, 2007), pp. 757\u2013760.","DOI":"10.1109\/ICASSP.2007.367023"},{"issue":"2","key":"256_CR16","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1016\/j.csl.2010.06.003","volume":"25","author":"D Povey","year":"2011","unstructured":"D Povey, L Burget, M Agarwal, P Akyazi, F Kai, A Ghoshal, O Glembek, N Goel, M Karafi\u00e1t, A Rastrow, RC Rose, P Schwarz, S Thomas, The subspace Gaussian mixture model - a structured model for speech recognition. Comput. Speech Lang.25(2), 404\u2013439 (2011).","journal-title":"Comput. Speech Lang."},{"issue":"6","key":"256_CR17","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"G Hinton, L Deng, D Yu, GE Dahl, A Mohamed, N Jaitly, A Senior, V Vanhoucke, P Nguyen, TN Sainath, B Kingsbury, Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Signal Process. Mag.29(6), 82\u201397 (2012).","journal-title":"IEEE Signal Process. Mag."},{"key":"256_CR18","unstructured":"A Sehr, Reverberation Modeling for Robust Distant-Talking Speech Recognition. PhD thesis, Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, (Germany, 2009)."},{"key":"256_CR19","unstructured":"C Breithaupt, R Martin, in ITG Conference on Voice Communication (Sprachkommunikation). DFT-based speech enhancement for robust automatic speech recognition (Aachen, Germany, 2008)."},{"key":"256_CR20","doi-asserted-by":"crossref","unstructured":"M Seltzer, D Yu, Y Wang, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). An Investigation of deep neural networks for noise robust speech recognition (Vancouver, Canada, 2013), pp. 7398\u20137402.","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"256_CR21","doi-asserted-by":"crossref","unstructured":"C Breithaupt, M Krawczyk, R Martin, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). Parameterized MMSE Spectral magnitude estimation for the enhancement of noisy speech (Las Vegas, NV, USA, 2008), pp. 4037\u20134040.","DOI":"10.1109\/ICASSP.2008.4518540"},{"issue":"5","key":"256_CR22","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1109\/89.928915","volume":"9","author":"R Martin","year":"2001","unstructured":"R Martin, Noise power spectral density estimation based on optimal smoothing and minimum statistics. IEEE Trans. Speech Audio Process.9(5), 504\u2013512 (2001).","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"11","key":"256_CR23","doi-asserted-by":"publisher","first-page":"4165","DOI":"10.1109\/TSP.2009.2025795","volume":"57","author":"T Gerkmann","year":"2009","unstructured":"T Gerkmann, R Martin, On the statistics of spectral amplitudes after variance reduction by temporal cepstrum smoothing and cepstral nulling. IEEE Trans. Signal Process.57(11), 4165\u20134174 (2009).","journal-title":"IEEE Trans. Signal Process."},{"issue":"3","key":"256_CR24","first-page":"359","volume":"87","author":"K Lebart","year":"2001","unstructured":"K Lebart, JM Boucher, PN Denbigh, A new method based on spectral subtraction for speech dereverberation. Acta Acustica United Acustica. 87(3), 359\u2013366 (2001).","journal-title":"Acta Acustica United Acustica"},{"key":"256_CR25","unstructured":"H Kuttruff, Room Acoustics, 4th edn (Spon Press, London, 2000)."},{"issue":"9","key":"256_CR26","doi-asserted-by":"publisher","first-page":"770","DOI":"10.1109\/LSP.2009.2024791","volume":"16","author":"EAP Habets","year":"2009","unstructured":"EAP Habets, S Gannot, I Cohen, Late reverberant spectral variance estimation based on a statistical model. IEEE Signal Process. Lett.16(9), 770\u2013773 (2009).","journal-title":"IEEE Signal Process. Lett."},{"key":"256_CR27","doi-asserted-by":"crossref","unstructured":"F Xiong, S Goetze, BT Meyer, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). Blind Estimation of Reverberation Time based on Spectro-Temporal Modulation Filtering (Vancouver, Canada, 2013), pp. 443\u2013447.","DOI":"10.1109\/ICASSP.2013.6637686"},{"key":"256_CR28","doi-asserted-by":"crossref","unstructured":"F Xiong, S Goetze, BT Meyer, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). Estimating room acoustic parameters for speech recognizer adaptation and combination in reverberant environments (Florence, Italy, 2014).","DOI":"10.1109\/ICASSP.2014.6854659"},{"issue":"11","key":"256_CR29","doi-asserted-by":"crossref","first-page":"1926","DOI":"10.1109\/TASLP.2015.2456420","volume":"23","author":"N Moritz","year":"2015","unstructured":"N Moritz, J Anem\u00fcller, B Kollmeier, An auditory inspired amplitude modulation filter bank for robust feature extraction in automatic speech recognition. IEEE Trans. Audio, Speech and Language Processing. 23(11), 1926\u20131937 (2015).","journal-title":"IEEE Trans. Audio, Speech and Language Processing"},{"issue":"6","key":"256_CR30","doi-asserted-by":"crossref","first-page":"1799","DOI":"10.1152\/jn.1988.60.6.1799","volume":"60","author":"G Langner","year":"1988","unstructured":"G Langner, CE Schreiner, Periodicity coding in the inferior colliculus of the Cat. I. Neuronal Mechanisms. J. Neurophysiol.60(6), 1799\u20131822 (1988).","journal-title":"J. Neurophysiol."},{"key":"256_CR31","doi-asserted-by":"crossref","unstructured":"N Mesgarani, S David, S Shamma, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP), 4. Representation of phonemes in primary auditory cortex: how the brain analyzes speech (Honolulu, HI, USA, 2007), pp. 765\u2013768.","DOI":"10.1109\/ICASSP.2007.367025"},{"issue":"5","key":"256_CR32","doi-asserted-by":"publisher","first-page":"2892","DOI":"10.1121\/1.420344","volume":"102","author":"T Dau","year":"1997","unstructured":"T Dau, B Kollmeier, A Kohlrausch, Modeling auditory processing of amplitude modulation. I, Detection and masking with narrow-band carriers. J. Acoust. Soc. Am.102(5), 2892\u20132905 (1997).","journal-title":"J. Acoust. Soc. Am."},{"issue":"5","key":"256_CR33","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1016\/j.specom.2010.07.002","volume":"53","author":"BT Meyer","year":"2011","unstructured":"BT Meyer, B Kollmeier, Robustness of spectro-temporal features against intrinsic and extrinsic variations in automatic speech recognition. Speech Commun.53(5), 753\u2013767 (2011).","journal-title":"Speech Commun."},{"issue":"4","key":"256_CR34","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"SB David","year":"1980","unstructured":"SB David, P Mermelstein, Comparison of parametric representation for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoustics, Speech Signal Process.28(4), 357\u2013366 (1980).","journal-title":"IEEE Trans. Acoustics, Speech Signal Process."},{"issue":"6","key":"256_CR35","doi-asserted-by":"publisher","first-page":"1304","DOI":"10.1121\/1.1914702","volume":"55","author":"B Atal","year":"1974","unstructured":"B Atal, Effectiveness of linear prediction characteristics of the speech wave for automatic speaker identification and verification. J. Acoust. Soc. Am.55(6), 1304\u20131322 (1974).","journal-title":"J. Acoust. Soc. Am."},{"key":"256_CR36","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1186\/s13634-015-0242-x","volume":"2015","author":"B Cauchi","year":"2015","unstructured":"B Cauchi, I Kodrasi, R Rehr, S Gerlach, A Juki\u0107, T Gerkmann, S Doclo, S Goetze, Combination of MVDR beamforming and single-channel spectral processing for enhancing noisy and reverberant speech. EURASIP Journal on Advances in Signal Processing. 2015, 61 (2015).","journal-title":"EURASIP Journal on Advances in Signal Processing"},{"key":"256_CR37","doi-asserted-by":"crossref","unstructured":"C Breithaupt, T Gerkmann, R Martin, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). A Novel A Priori SNR Estimation Approach based on Selective Cepstro-Temporal Smoothing (Las Vegas, NV, USA, 2008), pp. 4897\u20134900.","DOI":"10.1109\/ICASSP.2008.4518755"},{"issue":"6","key":"256_CR38","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Y Ephraim, D Malah, Speech enhancement using a minimum mean-square error short-time spectral amplitude estimator. IEEE Trans. Acoustics, Speech Signal Process.32(6), 1109\u20131121 (1984).","journal-title":"IEEE Trans. Acoustics, Speech Signal Process."},{"key":"256_CR39","unstructured":"H Meutzner, A Schlesinger, S Zeiler, D Kolossa, in Proc. 2nd CHiME Workshop on Machine Listening in Multisource Environments. Binaural signal processing for enhanced speech recognition robustness in complex listening environments (Vancouver, Canada, 2013), pp. 7\u201312."},{"key":"256_CR40","doi-asserted-by":"crossref","unstructured":"J Eaton, ND Gaubitch, PA Naylor, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). Noise-robust reverberation time estimation using spectral decay distributions with reduced computational cost (Vancouver, Canada, 2013), pp. 161\u2013165.","DOI":"10.1109\/ICASSP.2013.6637629"},{"key":"256_CR41","doi-asserted-by":"crossref","unstructured":"F Xiong, BT Meyer, S Goetze, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). A study on joint beamforming and spectral enhancement for robust speech recognition in reverberant environments (Brisbane, Australia, 2015), pp. 5043\u20135047.","DOI":"10.1109\/ICASSP.2015.7178931"},{"key":"256_CR42","unstructured":"C Breithaupt, Noise Reduction Algorithms for Speech Communications - Statistical Analysis and Improved Estimation Procedures. PhD thesis (Ruhr-Universit\u00e4t Bochum, Bochum, Germany, 2008)."},{"issue":"1","key":"256_CR43","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1007\/s002110100285","volume":"90","author":"KE Muller","year":"2001","unstructured":"KE Muller, Computing the Confluent Hypergeometric Function, M(a,b,x). Numerische Mathematik. 90(1), 179\u2013196 (2001).","journal-title":"Numerische Mathematik"},{"key":"256_CR44","doi-asserted-by":"crossref","unstructured":"R Maas, EAP Habets, A Sehr, W Kellermann, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). On the application of reverberation suppression to robust speech recognition (Kyoto, Japan, 2012), pp. 297\u2013300.","DOI":"10.1109\/ICASSP.2012.6287875"},{"key":"256_CR45","doi-asserted-by":"crossref","unstructured":"BT Meyer, SV Ravuri, MR Sch\u00e4dler, N Morgan, in Interspeech. Comparing Different Flavors of Spectro-Temporal Features for ASR (Florence, Italy, 2011), pp. 1269\u20131272.","DOI":"10.21437\/Interspeech.2011-103"},{"issue":"5","key":"256_CR46","doi-asserted-by":"publisher","first-page":"4134","DOI":"10.1121\/1.3699200","volume":"131","author":"MR Sch\u00e4dler","year":"2012","unstructured":"MR Sch\u00e4dler, BT Meyer, B Kollmeier, Spectro-temporal modulation subspace-spanning filter bank features for robust automatic speech recognition. J. Acoust. Soc. Am.131(5), 4134\u20134151 (2012).","journal-title":"J. Acoust. Soc. Am."},{"key":"256_CR47","unstructured":"S Haykin, Neural Networks and Learning Machines, 3rd edn (Prentice Hall, USA, 2008)."},{"key":"256_CR48","unstructured":"QuickNet package. http:\/\/wwwl.icsLberkeley.edu\/Speech\/qn.html ."},{"issue":"3","key":"256_CR49","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1121\/1.1909343","volume":"37","author":"MR Schroeder","year":"1965","unstructured":"MR Schroeder, New method of measuring reverberation time. J. Acoust. Soc. Amer.37(3), 409\u2013412 (1965).","journal-title":"J. Acoust. Soc. Amer."},{"key":"256_CR50","doi-asserted-by":"crossref","unstructured":"D Yu, ML Seltzer, in Proc. Interspeech. Improved Bottleneck Features using Pretrained Deep Neural Networks (Florence, Italy, 2011), pp. 237\u2013240.","DOI":"10.21437\/Interspeech.2011-91"},{"key":"256_CR51","unstructured":"JK Baker, Stochastic Modeling for Automatic Speech Recognition. Speech Recognition. (DR Reddy, ed.), (New York: Academic, 1975)."},{"issue":"2","key":"256_CR52","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1109\/TIT.1986.1057145","volume":"32","author":"BH Juang","year":"1986","unstructured":"BH Juang, S Levinson, M Sondhi, Maximum likelihood estimation for multivariate mixture observations of Markov chains. IEEE Trans. Inform. Theory. 32(2), 307\u2013309 (1986).","journal-title":"IEEE Trans. Inform. Theory"},{"key":"256_CR53","doi-asserted-by":"crossref","unstructured":"D Povey, K Yao, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). A basis method for robust estimation of constrained MLLR (Prague, Czech Republic, 2011), pp. 4460\u20134463.","DOI":"10.1109\/ICASSP.2011.5947344"},{"key":"256_CR54","doi-asserted-by":"crossref","unstructured":"D Povey, D Kanevsky, B Kingsbury, B Ramabhadran, G Saon, K Visweswariah, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). Boosted MMI for Model and Feature-Space Discriminative Training (Las Vegas, NV, USA, 2008), pp. 4057\u20134060.","DOI":"10.1109\/ICASSP.2008.4518545"},{"key":"256_CR55","doi-asserted-by":"crossref","unstructured":"M Gibson, T Hain, in Proc. Interspeech. Hypothesis spaces for minimum Bayes risk training in large vocabulary speech recognition (Pittsburgh, Pennsylvania, USA, 2006), pp. 2406\u20132409.","DOI":"10.21437\/Interspeech.2006-603"},{"key":"256_CR56","doi-asserted-by":"crossref","unstructured":"DE Rumelhart, GE Hinton, RJ Williams, Learning Internal Representations by Error Propagation. Parallel distributed processing: Explorations in the microstructure of cognition. 1: Foundations. MIT Press (1986). ISBN:0-262-68053-X.","DOI":"10.7551\/mitpress\/5236.001.0001"},{"issue":"1","key":"256_CR57","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"A Mohamed","year":"2012","unstructured":"A Mohamed, GE Dahl, G Hinton, Acoustic modeling using deep belief networks. IEEE Trans. Audio Speech Lang. Process.20(1), 14\u201322 (2012).","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"256_CR58","unstructured":"D Yu, ML Seltzer, J Li, J-T Huang, F Seide, in Proc. of ICLR. Feature learning in deep neural networks - studies on speech recognition tasks, (2013). arXiv:1301.3605v3."},{"key":"256_CR59","doi-asserted-by":"crossref","unstructured":"T Robinson, J Fransen, D Pye, J Foote, S Renals, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). WSJCAM0: A British English Speech Corpus for Large Vocabulary Continuous Speech Recognition (Detroit, Michigan, USA, 1995), pp. 81\u201384.","DOI":"10.1109\/ICASSP.1995.479278"},{"key":"256_CR60","doi-asserted-by":"crossref","unstructured":"M Lincoln, I McCowan, J Vepa, HK Maganti, in IEEE Workshop on Automatic Speech Recognition and Understanding. The Multi-Channel Wall Street Journal Audio Visual Corpus (MC-WSJ-AV): Specification and Initial Experiments (San Juan, Puerto Rico, 2005), pp. 357\u2013362.","DOI":"10.1109\/ASRU.2005.1566470"},{"key":"256_CR61","unstructured":"J Garofalo, D Graff, D Paul, D Pallett, in Linguistic Data Lconsortium (LDC). CSR-I (WSJ0) Complete (Philadelphia, USA, 2007)."},{"key":"256_CR62","doi-asserted-by":"crossref","unstructured":"RA Gopinath, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP), 2. Maximum Likelihood Modeling with Gaussian Distributions for Classification (Seattle, WA, USA, 1998), pp. 661\u2013664.","DOI":"10.1109\/ICASSP.1998.675351"},{"key":"256_CR63","unstructured":"Y Tachioka, T Narita, F Weninger, S Watanabe, in Proc. of the REVERB Challenge. Dual system combination approach for various reverberant environments with dereverberation Techniques (Florence, Italy, 2014)."},{"key":"256_CR64","doi-asserted-by":"crossref","unstructured":"F Gr\u00e9zl, P Fousek, in IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP). Optimizing Bottle-Neck Features for LVCSR (Las Vegas, NV, USA, 2008), pp. 4729\u20134732.","DOI":"10.1109\/ICASSP.2008.4518713"},{"key":"256_CR65","doi-asserted-by":"crossref","unstructured":"K Vesel\u00fd, A Ghosal, L Burget, D Povey, in Proc. Interspeech. Sequence-discriminative training of deep neural networks (Lyon, France, 2013), pp. 2345\u20132349.","DOI":"10.21437\/Interspeech.2013-548"},{"key":"256_CR66","doi-asserted-by":"crossref","unstructured":"J Li, D Yu, J-T Huang, Y Gong, in IEEE Workshop on Spoken Language Technology. Improving Wideband Speech Recognition using Mixed-Bandwidth Training Data in CD-DNN-HMM (Miami, FL, USA, 2012), pp. 131\u2013136.","DOI":"10.1109\/SLT.2012.6424210"},{"key":"256_CR67","doi-asserted-by":"crossref","unstructured":"Z T\u00fcske, P Golik, R Schl\u00fcter, H Ney, in Proc. Interspeech. Acoustic modeling with deep neural networks using raw time signal for LVCSR (Singapore, 2014), pp. 890\u2013894.","DOI":"10.21437\/Interspeech.2014-223"}],"container-title":["EURASIP Journal on Advances in Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13634-015-0256-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13634-015-0256-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13634-015-0256-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13634-015-0256-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T21:39:13Z","timestamp":1748554753000},"score":1,"resource":{"primary":{"URL":"https:\/\/asp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13634-015-0256-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,8,5]]},"references-count":67,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["256"],"URL":"https:\/\/doi.org\/10.1186\/s13634-015-0256-4","relation":{},"ISSN":["1687-6180"],"issn-type":[{"type":"electronic","value":"1687-6180"}],"subject":[],"published":{"date-parts":[[2015,8,5]]},"assertion":[{"value":"22 February 2015","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 July 2015","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 August 2015","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"70"}}