{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T05:34:40Z","timestamp":1677648880735},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2015,9,3]],"date-time":"2015-09-03T00:00:00Z","timestamp":1441238400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2016,5]]},"DOI":"10.1007\/s11042-015-2849-1","type":"journal-article","created":{"date-parts":[[2015,9,2]],"date-time":"2015-09-02T02:18:46Z","timestamp":1441160326000},"page":"5093-5108","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Combination of bottleneck feature extraction and dereverberation for distant-talking speech recognition"],"prefix":"10.1007","volume":"75","author":[{"given":"Bo","family":"Ren","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuma","family":"Ueda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Atsuhiko","family":"Kai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,9,3]]},"reference":[{"issue":"9","key":"2849_CR1","doi-asserted-by":"crossref","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid O, Mohamed A-r, Jiang H, Deng L, Penn G, Yu D (2014) Convolutional neural networks for speech recognition. IEEE\/ACM Trans Audio, Speech, Lang Process 22(9):1533\u20131545","journal-title":"IEEE\/ACM Trans Audio, Speech, Lang Process"},{"key":"2849_CR2","unstructured":"Evermann G, Woodland P C (2000) Posterior probability decoding, confidence estimation and system combination. In: Proc. Speech Transcr. Work., vol 27. Baltimore"},{"issue":"2","key":"2849_CR3","doi-asserted-by":"crossref","first-page":"254","DOI":"10.1109\/TASSP.1981.1163530","volume":"29","author":"S Furui","year":"1981","unstructured":"Furui S (1981) Cepstral analysis technique for automatic speaker verification. Acoust Speech Signal Process IEEE Trans 29(2):254\u2013272","journal-title":"Acoust Speech Signal Process IEEE Trans"},{"key":"2849_CR4","doi-asserted-by":"crossref","unstructured":"Gesbert D, Duhamel P (1997) Robust blind channel identification and equalization based on multi-step predictors. In: ICASSP, IEEE Int. Conf. Acoust. Speech Signal Process. - Proc., vol 5, pp 3621\u20133624","DOI":"10.1109\/ICASSP.1997.604650"},{"key":"2849_CR5","doi-asserted-by":"crossref","unstructured":"Gr\u00e9zl F, Fousek P (2008) Optimizing bottle-neck features for LVCSR. In: ICASSP, IEEE Int. Conf. Acoust. Speech Signal Process. - Proc., pp 4729\u20134732","DOI":"10.1109\/ICASSP.2008.4518713"},{"key":"2849_CR6","doi-asserted-by":"crossref","unstructured":"Gr\u00e9zl F, Karafi\u00e1t M, Kont\u00e1r S, \u010cernock\u00fd J (2007) Probabilistic and bottle-neck features for LVCSR of meetings. In: ICASSP, IEEE Int. Conf. Acoust. Speech Signal Process. - Proc, vol 4. IEEE, pp 757\u2013760","DOI":"10.1109\/ICASSP.2007.367023"},{"key":"2849_CR7","doi-asserted-by":"crossref","unstructured":"Hermansky H, Ellis D P W, Sharma S (2000) Tandem connectionist feature extraction for conventional HMM systems. In: ICASSP, IEEE Int. Conf. Acoust. Speech Signal Process. - Proc. IEEE, pp 1635\u20131638","DOI":"10.1109\/ICASSP.2000.862024"},{"issue":"November","key":"2849_CR8","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton G, Deng L, Yu D, Dahl G, Mohamed A R, Jaitly N, Senior A, Vanhoucke V, Nguyen P, Sainath T, Kingsbury B (2012) Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups. IEEE Signal Process Mag 29(November):82\u201397","journal-title":"IEEE Signal Process Mag"},{"issue":"July","key":"2849_CR9","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton G E, Salakhutdinov R R R (2006) Reducing the dimensionality of data with neural networks. Science 313(July):504\u2013507","journal-title":"Science"},{"key":"2849_CR10","doi-asserted-by":"crossref","unstructured":"Ishii T, Komiyama H, Shinozaki Y, Horiuchi T, Kuroiwa S (2013) Reverberant speech recognition based on denoising autoencoder. In: Proc. Annu. Conf. Int. Speech Commun. Assoc. INTERSPEECH, pp 3512\u20133516","DOI":"10.21437\/Interspeech.2013-267"},{"key":"2849_CR11","doi-asserted-by":"crossref","first-page":"534","DOI":"10.1109\/TASL.2008.2009015","volume":"17","author":"K Kinoshita","year":"2009","unstructured":"Kinoshita K, Delcroix M, Nakatani T, Miyoshi M (2009) Suppression of late reverberation effect on speech signal using long-term multiple-step linear prediction. IEEE Trans Audio, Speech Lang Process 17:534\u2013545","journal-title":"IEEE Trans Audio, Speech Lang Process"},{"key":"2849_CR12","doi-asserted-by":"crossref","unstructured":"Kinoshita K, Delcroix M, Yoshioka T, Nakatani T, Sehr A, Kellermann W, Maas R (2013) The reverb challenge: A common evaluation framework for dereverberation and recognition of reverberant speech. In: Appl. Signal Process. to Audio Acoust. (WASPAA), 2013 IEEE Work. IEEE, pp 1\u20134","DOI":"10.1109\/WASPAA.2013.6701894"},{"key":"2849_CR13","doi-asserted-by":"crossref","first-page":"2506","DOI":"10.1109\/TASL.2013.2277932","volume":"21","author":"P Lal","year":"2013","unstructured":"Lal P, King S (2013) Cross-lingual automatic speech recognition using tandem features. IEEE Trans Audio, Speech, Lang Process 21:2506\u20132515","journal-title":"IEEE Trans Audio, Speech, Lang Process"},{"key":"2849_CR14","unstructured":"Liang L, Renals S (2014) Probabilistic linear discriminant analysis with bottleneck features for speech recognition. In: Proc. Annu. Conf. Int. Speech Commun. Assoc. INTERSPEECH"},{"key":"2849_CR15","unstructured":"Liu F-H, Stern R M, Huang X, Acero A (1993) Efficient cepstral normalization for robust speech recognition, Proc. Work. Hum. Lang. Technol. - HLT \u201993"},{"issue":"3","key":"2849_CR16","first-page":"659","volume":"94","author":"W Longbiao","year":"2011","unstructured":"Longbiao W, Kitaoka N, Nakagawa S, Wang L, Kitaoka N, Nakagawa S (2011) Distant-talking speech recognition based on spectral subtraction by multi-channel LMS algorithm. IEICE Trans Inf Syst 94(3):659\u2013667","journal-title":"IEICE Trans Inf Syst"},{"key":"2849_CR17","doi-asserted-by":"crossref","unstructured":"Nguyen Q B, Gehring J, Muller M, Stuker S, Waibel A (2014) Multilingual shifting deep bottleneck features for low-resource ASR. In: ICASSP, IEEE Int. Conf. Acoust. Speech Signal Process. - Proc., pp 5607\u20135611","DOI":"10.1109\/ICASSP.2014.6854676"},{"key":"2849_CR18","doi-asserted-by":"crossref","unstructured":"Sainath T N (2012) Auto-encoder bottleneck features using deep belief networks. In: ICASSP, IEEE Int. Conf. Acoust. Speech Signal Process. - Proc., pp 4153\u2013 4156","DOI":"10.1109\/ICASSP.2012.6288833"},{"key":"2849_CR19","unstructured":"Sak H, Senior A, Beaufays F (2014) Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition. arXiv: 1402.1128"},{"key":"2849_CR20","doi-asserted-by":"crossref","unstructured":"Seide F, Li G, Yu D (2011) Conversational speech transcription using Context-Dependent Deep Neural Networks. In: Proc. Annu. Conf. Int. Speech Commun. Assoc. INTERSPEECH, pp 437\u2013440","DOI":"10.21437\/Interspeech.2011-169"},{"key":"2849_CR21","unstructured":"Sundermeyer M, Schl R, Ney H (2012) Context-Dependent MLPs for LVCSR : TANDEM, Hybrid or Both ?. In: Proc. Annu. Conf. Int. Speech Commun. Assoc. INTERSPEECH"},{"key":"2849_CR22","doi-asserted-by":"crossref","unstructured":"Ueda Y, Wang L, Kai A, Xiao X, Chng E, Li H (2015) Single-channel Dereverberation for Distant-Talking Speech Recognition by Combining Denoising Autoencoder and Temporal Structure Normalization, J. Signal Process. Syst.","DOI":"10.1109\/ISCSLP.2014.6936613"},{"key":"2849_CR23","doi-asserted-by":"crossref","unstructured":"Vincent P, Larochelle H, Bengio Y, Manzagol P-A (2008) Extracting and composing robust features with denoising autoencoders. In: Proc. 25th Int. Conf. Mach. Learn. ACM Press, pp 1096\u20131103","DOI":"10.1145\/1390156.1390294"},{"key":"2849_CR24","first-page":"3371","volume":"11","author":"P Vincent","year":"2010","unstructured":"Vincent P, Larochelle H, Lajoie I, Bengio Y, Manzagol P-A (2010) Stacked denoising autoencoders: learning useful representations in a deep network with a local denoising criterion. J Mach Learn Res 11:3371\u20133408","journal-title":"J Mach Learn Res"},{"key":"2849_CR25","doi-asserted-by":"crossref","unstructured":"Wang L, Bo R, Ueda Y, Kai A, Teraoka S, Fukushima T (2014) Denoising autoencoder and environment adaptation for distant-talking speech recognition with asynchronous speech recording. In: APSIPA ASC","DOI":"10.1109\/APSIPA.2014.7041548"},{"key":"2849_CR26","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/1687-6180-2012-1","volume":"2012","author":"L Wang","year":"2012","unstructured":"Wang L, Odani K, Kai A (2012) Dereverberation and denoising based on generalized spectral subtraction by multi-channel LMS algorithm using a small-scale microphone array. EURASIP J Adv Signal Process 2012:1\u201312","journal-title":"EURASIP J Adv Signal Process"},{"key":"2849_CR27","doi-asserted-by":"crossref","unstructured":"Xie X, Su R, Liu X, Wang L (2014) Deep neural network bottleneck features for generalized variable parameter HMMs. In: Proc. Annu. Conf. Int. Speech Commun. Assoc. INTERSPEECH. ISCA, pp 2739\u20132743","DOI":"10.21437\/Interspeech.2014-158"},{"key":"2849_CR28","doi-asserted-by":"crossref","unstructured":"Yamada T, Wang L, Kai A (2013) Improvement of distant-talking speaker identification using bottleneck features of DNN. In: INTERSPEECH, pp 3661\u20133664","DOI":"10.21437\/Interspeech.2013-686"},{"key":"2849_CR29","unstructured":"Yu D, Deng L, Dahl G E (2010) Roles of pretraining and fine-tuning in context-dependent DBN-HMMs for real-world speech recognition. In: NIPS Work. Deep Learn. Unsupervised Featur. Learn."},{"key":"2849_CR30","doi-asserted-by":"crossref","unstructured":"Yu D, Seltzer M L (2011) Improved Bottleneck Features Using Pretrained Deep Neural Networks. In: Proc. Annu. Conf. Int. Speech Commun. Assoc. INTERSPEECH, pp 237\u2013240","DOI":"10.21437\/Interspeech.2011-91"},{"issue":"1","key":"2849_CR31","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1186\/1687-4722-2014-15","volume":"2014","author":"Z Zhang","year":"2014","unstructured":"Zhang Z, Wang L, Kai A (2014) Distant-talking speaker identification by generalized spectral subtraction-based dereverberation and its efficient computation. EURASIP J Audio, Speech, Music Process 2014(1):15","journal-title":"EURASIP J Audio, Speech, Music Process"},{"issue":"1","key":"2849_CR32","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1186\/s13636-015-0056-7","volume":"2015","author":"Z Zhang","year":"2015","unstructured":"Zhang Z, Wang L, Kai A, Yamada T, Li W, Iwahashi M (2015) Deep neural network-based bottleneck feature and denoising autoencoder-based dereverberation for distant-talking speaker identification. EURASIP J Audio, Speech, Music Process 2015(1):12","journal-title":"EURASIP J Audio, Speech, Music Process"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-015-2849-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-015-2849-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-015-2849-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,21]],"date-time":"2022-05-21T06:01:05Z","timestamp":1653112865000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-015-2849-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,9,3]]},"references-count":32,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2016,5]]}},"alternative-id":["2849"],"URL":"https:\/\/doi.org\/10.1007\/s11042-015-2849-1","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,9,3]]}}}