{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:51:56Z","timestamp":1740099116953,"version":"3.37.3"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319995786"},{"type":"electronic","value":"9783319995793"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-99579-3_47","type":"book-chapter","created":{"date-parts":[[2018,8,24]],"date-time":"2018-08-24T07:36:09Z","timestamp":1535096169000},"page":"451-460","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Building Real-Time Speech Recognition Without CMVN"],"prefix":"10.1007","author":[{"given":"Thai Son","family":"Nguyen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthias","family":"Sperber","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sebastian","family":"St\u00fcker","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alex","family":"Waibel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,25]]},"reference":[{"key":"47_CR1","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1007\/978-3-642-25020-0_32","volume-title":"Advances in Nonlinear Speech Processing","author":"MJ Alam","year":"2011","unstructured":"Alam, M.J., Ouellet, P., Kenny, P., O\u2019Shaughnessy, D.: Comparative evaluation of feature normalization techniques for speaker verification. In: Travieso-Gonz\u00e1lez, C.M., Alonso-Hern\u00e1ndez, J.B. (eds.) NOLISP 2011. LNCS (LNAI), vol. 7015, pp. 246\u2013253. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-25020-0_32"},{"key":"47_CR2","unstructured":"Cettolo, M., Niehues, J., St\u00fcker, S., Bentivogli, L., Frederico, M.: Report on the 10th IWSLT evaluation campaign. In: The International Workshop on Spoken Language Translation (IWSLT) 2013 (2013)"},{"issue":"4","key":"47_CR3","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"Dehak, N., Kenny, P.J., Dehak, R., Dumouchel, P., Ouellet, P.: Front-end factor analysis for speaker verification. IEEE Trans. Audio Speech Lang. Process. 19(4), 788\u2013798 (2011)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"47_CR4","unstructured":"Duda, R.O., Hart, P.E., Stork, D.G.: Pattern Classification. Wiley-Interscience (2000)"},{"key":"47_CR5","unstructured":"Federmann, C., Lewis, W.D.: Microsoft speech language translation (MSLT) corpus: the IWSLT 2016 release for English, French and German. In: The International Workshop on Spoken Language Translation (IWSLT) 2016 (2016)"},{"key":"47_CR6","doi-asserted-by":"crossref","unstructured":"Finke, M., Geutner, P., Hild, H., Kemp, T., Ries, K.R., Westphal, M.: The karlsruhe VERBMOBIL speech recognition engine. In: Proceedings of ICASSP (1997)","DOI":"10.1109\/ICASSP.1997.599552"},{"issue":"2","key":"47_CR7","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1109\/TASSP.1981.1163530","volume":"29","author":"S Furui","year":"1981","unstructured":"Furui, S.: Cepstral analysis technique for automatic speaker verification. IEEE Trans. Acoust. Speech Signal Process. 29(2), 254\u2013272 (1981)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"47_CR8","doi-asserted-by":"crossref","unstructured":"Gehring, J., Miao, Y., Metze, F., Waibel, A.: Extracting deep bottleneck features using stacked auto-encoders. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3377\u20133381. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638284"},{"key":"47_CR9","unstructured":"Graff, D.: The 1996 broadcast news speech and language-model corpus. In: Proceedings of the DARPA Workshop on Spoken Language Technology (1997)"},{"key":"47_CR10","unstructured":"Hannun, A., et al.: Deep speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567 (2014)"},{"key":"47_CR11","unstructured":"Jaitly, N., Hinton, G.E.: Vocal tract length perturbation (VTLP) improves speech recognition. In: Proceedings of ICML Workshop on Deep Learning for Audio, Speech and Language (2013)"},{"key":"47_CR12","doi-asserted-by":"crossref","unstructured":"Ko, T., Peddinti, V., Povey, D., Khudanpur, S.: Audio augmentation for speech recognition. In: INTERSPEECH, pp. 3586\u20133589 (2015)","DOI":"10.21437\/Interspeech.2015-711"},{"key":"47_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/978-3-642-35289-8_3","volume-title":"Neural Networks: Tricks of the Trade","author":"YA LeCun","year":"2012","unstructured":"LeCun, Y.A., Bottou, L., Orr, G.B., M\u00fcller, K.-R.: Efficient backprop. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. LNCS, vol. 7700, pp. 9\u201348. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-35289-8_3"},{"key":"47_CR14","doi-asserted-by":"crossref","unstructured":"Peddinti, V., Povey, D., Khudanpur, S.: A time delay neural network architecture for efficient modeling of long temporal contexts. In: INTERSPEECH, pp. 3214\u20133218 (2015)","DOI":"10.21437\/Interspeech.2015-647"},{"key":"47_CR15","unstructured":"Povey, D., et al.: The Kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding. IEEE Signal Processing Society, December 2011"},{"key":"47_CR16","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/978-3-319-52920-2_2","volume-title":"Analysis of Images, Social Networks and Texts","author":"T Prisyach","year":"2017","unstructured":"Prisyach, T., Mendelev, V., Ubskiy, D.: Data augmentation for training of noise robust acoustic models. In: Ignatov, D.I., et al. (eds.) AIST 2016. CCIS, vol. 661, pp. 17\u201325. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-52920-2_2"},{"key":"47_CR17","doi-asserted-by":"crossref","unstructured":"Pujol, P., Macho, D., Nadeu, C.: On real-time mean-and-variance normalization of speech recognition features. In: 2006 IEEE International Conference on Acoustics, Speech and Signal Processing, 2006. ICASSP 2006 Proceedings, vol. 1, p. I. IEEE (2006)","DOI":"10.1109\/ICASSP.2006.1660135"},{"key":"47_CR18","unstructured":"Rousseau, A., Del\u00e9glise, P., Est\u00e8ve, Y.: Enhancing the TED-LIUM corpus with selected data for language modeling and more TED talks. In: Proceedings of LREC (2014)"},{"key":"47_CR19","doi-asserted-by":"crossref","unstructured":"Sainath, T.N., Kingsbury, B., Mohamed, A.R., Ramabhadran, B.: Learning filter banks within a deep neural network framework. In: 2013 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 297\u2013302. IEEE (2013)","DOI":"10.1109\/ASRU.2013.6707746"},{"key":"47_CR20","doi-asserted-by":"crossref","unstructured":"Sainath, T.N., Mohamed, A.R., Kingsbury, B., Ramabhadran, B.: Deep convolutional neural networks for LVCSR. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8614\u20138618. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"47_CR21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23869-7_44","volume-title":"High Performance Computing in Science and Engineering \u201911","author":"S St\u00fcker","year":"2012","unstructured":"St\u00fcker, S., Kilgour, K., Kraft, F.: Quaero 2010 speech-to-text evaluation systems. In: Nagel, W., Kr\u00f6ner, D., Resch, M. (eds.) High Performance Computing in Science and Engineering \u201911. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-23869-7_44"},{"issue":"1","key":"47_CR22","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1016\/S0167-6393(98)00033-8","volume":"25","author":"O Viikki","year":"1998","unstructured":"Viikki, O., Laurila, K.: Cepstral domain segmental feature vector normalization for noise robust speech recognition. Speech Commun. 25(1), 133\u2013147 (1998)","journal-title":"Speech Commun."},{"key":"47_CR23","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., Manzagol, P.A.: Extracting and composing robust features with denoising autoencoders. In: The 25th International Conference on Machine Learning, pp. 1096\u20131103. ACM (2008)","DOI":"10.1145\/1390156.1390294"},{"key":"47_CR24","doi-asserted-by":"crossref","unstructured":"Yu, D., Seltzer, M.L.: Improved bottleneck features using pretrained deep neural networks. In: Interspeech, vol. 237, p. 240 (2011)","DOI":"10.21437\/Interspeech.2011-91"},{"key":"47_CR25","doi-asserted-by":"crossref","unstructured":"Zeiler, M.D., et al.: On rectified linear units for speech processing. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3517\u20133521. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638312"},{"key":"47_CR26","doi-asserted-by":"crossref","unstructured":"Zeyer, A., Schl\u00fcter, R., Ney, H.: Towards online-recognition with deep bidirectional LSTM acoustic models. In: Interspeech 2016, pp. 3424\u20133428 (2016)","DOI":"10.21437\/Interspeech.2016-759"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99579-3_47","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T05:00:43Z","timestamp":1661835643000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_47"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319995786","9783319995793"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99579-3_47","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}