{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T17:48:14Z","timestamp":1760204894486,"version":"3.41.0"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319529196"},{"type":"electronic","value":"9783319529202"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-52920-2_2","type":"book-chapter","created":{"date-parts":[[2017,2,16]],"date-time":"2017-02-16T18:13:40Z","timestamp":1487268820000},"page":"17-25","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Data Augmentation for Training of Noise Robust Acoustic Models"],"prefix":"10.1007","author":[{"given":"Tatiana","family":"Prisyach","sequence":"first","affiliation":[]},{"given":"Valentin","family":"Mendelev","sequence":"additional","affiliation":[]},{"given":"Dmitry","family":"Ubskiy","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,17]]},"reference":[{"key":"2_CR1","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton, G., Deng, L., Yu, D., Dahl, G.E.: Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Sig. Process. Mag. 29, 82\u201397 (2012)","journal-title":"IEEE Sig. Process. Mag."},{"key":"2_CR2","first-page":"105","volume":"12","author":"S Yaman","year":"2012","unstructured":"Yaman, S., Pelecanos, J.W., Sarikaya, R.: Bottleneck features for speaker recognition. Odyssey 12, 105\u2013108 (2012)","journal-title":"Odyssey"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Ragni, A., Knill, K.M., Rath, S.P., Gales, M.J.F.: Data augmentation for low resource languages. In: Proceedings of Interspeech 2014, pp. 810\u2013814 (2014)","DOI":"10.21437\/Interspeech.2014-207"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Kim, C., Stern, R.M.: Feature extraction for robust speech recognition based on maximizing the sharpness of the power distribution and on power flooring. In: Proceedings of ICASSP 2010, pp. 4574\u20134577 (2010)","DOI":"10.1109\/ICASSP.2010.5495570"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Morgan, N., Bayya, A., Kohn, P.: Compensation for the effect of communication channel in auditory-like analysis of speech (RASTA-PLP). In: Proceedings of European Conference on Speech Technology 1991, pp. 1367\u20131370 (1991)","DOI":"10.21437\/Eurospeech.1991-312"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Viikki, O., Bye, D., Laurila, K.: A recursive feature vector normalization approach for robust speech recognition in noise. In: Proceedings of ICASSP 1998, pp. 733\u2013736 (1998)","DOI":"10.1109\/ICASSP.1998.675369"},{"issue":"2","key":"2_CR7","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"F Boll","year":"1979","unstructured":"Boll, F.: Suppression of acoustic noise in speech using spectral subtraction. IEEE T-ASSP 27(2), 113\u2013120 (1979)","journal-title":"IEEE T-ASSP"},{"key":"2_CR8","unstructured":"Mauuary, L.: Blind equalization in the cepstral domain for robust telephone based speech recognition. In: Proceedings of EUSPICO 1998, vol. 1, pp. 359\u2013363 (1998)"},{"issue":"2","key":"2_CR9","first-page":"291","volume":"2","author":"J-L Gauvain","year":"1994","unstructured":"Gauvain, J.-L., Lee, C.-H.: Maximum a posteriori estimation of multivariate Gaussian mixture observations of Markov chains. IEEE T-SAP 2(2), 291\u2013298 (1994)","journal-title":"IEEE T-SAP"},{"key":"2_CR10","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1006\/csla.1998.0043","volume":"12","author":"MJF Gales","year":"1998","unstructured":"Gales, M.J.F.: Maximum likelihood linear transformations for HMM-based speech recognition. Comput. Speech Lang. 12, 75\u201398 (1998)","journal-title":"Comput. Speech Lang."},{"key":"2_CR11","unstructured":"Deng, L., Acero, A., Jiang, L., Droppo, J., Huang, X.D.: High-performance robust speech recognition using stereo training data. In: Proceedings of ICASSP 2001, pp. 301\u2013304 (2001)"},{"issue":"4","key":"2_CR12","first-page":"417","volume":"8","author":"MJF Gales","year":"2000","unstructured":"Gales, M.J.F.: Cluster adaptive training of hidden Markov models. IEEE T-SAP 8(4), 417\u2013428 (2000)","journal-title":"IEEE T-SAP"},{"key":"2_CR13","unstructured":"Lee, D.D., Seung, H.S.: Algorithms for non-negative matrix factorization. In: Proceedings of NIPS 2000, pp. 556\u2013562 (2000)"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Deng, J., Li, L., Yu, D., Gong, Y., Acero, A.: High-performance HMM adaptation with joint compensation of additive and convolutive distortions via vector Taylor series. In: Proceedings of ASRU 2007, pp. 65\u201370 (2007)","DOI":"10.1109\/ASRU.2007.4430085"},{"key":"2_CR15","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1006\/csla.2001.0186","volume":"16","author":"L Lamel","year":"2002","unstructured":"Lamel, L., Gauvain, J.-L.: Lightly supervised and unsupervised acoustic model training. Comput. Speech Lang. 16, 115\u2013129 (2002)","journal-title":"Comput. Speech Lang."},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Gales, M.J.F., Ragni, A., AlDamarki, H., Gautier, C.: Support vector machines for noise robust ASR. In: Proceedings of ASRU 2009, pp. 205\u2013210 (2009)","DOI":"10.1109\/ASRU.2009.5372913"},{"key":"2_CR17","unstructured":"Jaitly, N., Hinton, G.E.: Vocal tract length perturbation (VTLP) improves speech recognition. In: Proceedings of ICML 2013 (2013)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Burget, L., Schwarz, P., Agarwal, M., Akyazi, P.: Multilingual acoustic modeling for speech recognition based on subspace Gaussian mixture models. In: Proceedings of ICASSP 2010, pp. 4334\u20134337 (2010)","DOI":"10.1109\/ICASSP.2010.5495646"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Ko, T., Peddinti, V., Povey, D., Khudanpur, S.: Audio augmentation for speech recognition. In: Proceedings of Interspeech 2015 (2015)","DOI":"10.21437\/Interspeech.2015-711"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Cui, X., Goel, V., Kingsbury, B.: Data augmentation for deep neural network acoustic modeling. In: Proceedings of ICASSP 2014 (2014)","DOI":"10.1109\/ICASSP.2014.6854671"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Jeub, M., Schaefer, M., Vary, P.: A binaural room impulse response database for the evaluation of dereverberation algorithms. In: Proceedings of 16th International Conference on Digital Signal Processing (DSP), Santorini, Greece (2009)","DOI":"10.1109\/ICDSP.2009.5201259"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Peddinti, V., Chen, G., Povey, D., Khudanpur, S.L.: Reverberation robust acoustic modeling using i-vectors with time delay neural networks. In: Proceedings of Interspeech 2015, pp. 2440\u20132444 (2015)","DOI":"10.21437\/Interspeech.2015-527"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Yu, D., Seltzer, M.L.: Improved bottleneck features using pretrained deep neural networks. In: Proceedings of Interspeech 2011, pp. 237\u2013240 (2011)","DOI":"10.21437\/Interspeech.2011-91"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Karafi\u00e1t, M., Gr\u00e9zl, F., Burget, L., Sz\u0151ke, I., \u010cernosk\u00fd, J.: Three ways to adapt a CTS recognizer to unseen reverberated speech in BUT system for the ASpIRE challenge. In: Proceedings of Interspeech 2015, pp. 2454\u20132458 (2015)","DOI":"10.21437\/Interspeech.2015-530"},{"issue":"9","key":"2_CR25","doi-asserted-by":"publisher","first-page":"1215","DOI":"10.1109\/5.237532","volume":"81","author":"JW Picone","year":"1993","unstructured":"Picone, J.W.: Signal modeling techniques in speech recognition. Proc. IEEE 81(9), 1215\u20131247 (1993)","journal-title":"Proc. IEEE"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Dean, D.B., Kanagasundaram, A., Ghaemmaghami, H., Rahman, M., Sridharan, S.: The QUT-NOISE-SRE protocol for the evaluation of noisy speaker recognition. In: Proceedings of the 16th Annual Conference of the International Speech Communication Association, Interspeech 2015, pp. 3456\u20133460 (2015)","DOI":"10.21437\/Interspeech.2015-685"},{"key":"2_CR27","unstructured":"Poll\u00e1k, P.: Efficient and reliable measurement and simulation of noisy speech background. In: 2002 11th European Signal Processing Conference, pp. 1\u20134 (2002)"},{"key":"2_CR28","unstructured":"L\u00f6llmann, H.W., Yilmaz, E., Jeub, M., Vary, P.: An improved algorithm for blind reverberation time estimation. In: Proceedings of International Workshop on Acoustic Echo and Noise Control (IWAENC) (2010)"},{"issue":"1","key":"2_CR29","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1109\/TASL.2006.876778","volume":"15","author":"E McDermott","year":"2007","unstructured":"McDermott, E., Hazen, T., Roux, J.L., Nakamura, A., Katagiri, S.: Discriminative training for large vocabulary speech recognition using minimum classification error. IEEE Trans. Speech Audio Process 15(1), 203\u2013223 (2007)","journal-title":"IEEE Trans. Speech Audio Process"}],"container-title":["Communications in Computer and Information Science","Analysis of Images, Social Networks and Texts"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-52920-2_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T12:33:07Z","timestamp":1749990787000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-52920-2_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319529196","9783319529202"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-52920-2_2","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"17 February 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIST","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Analysis of Images, Social Networks and Texts","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Yekaterinburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 April 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 April 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aist2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/aistconf.org\/2016\/en","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}