{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T01:10:01Z","timestamp":1751850601495,"version":"3.41.0"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030007935"},{"type":"electronic","value":"9783030007942"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-00794-2_35","type":"book-chapter","created":{"date-parts":[[2018,9,7]],"date-time":"2018-09-07T19:50:24Z","timestamp":1536349824000},"page":"324-333","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Robust Recognition of Conversational Telephone Speech via Multi-condition Training and Data Augmentation"],"prefix":"10.1007","author":[{"given":"Ji\u0159\u00ed","family":"M\u00e1lek","sequence":"first","affiliation":[]},{"given":"Jind\u0159ich","family":"\u017dd\u00e1nsk\u00fd","sequence":"additional","affiliation":[]},{"given":"Petr","family":"\u010cerva","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,8]]},"reference":[{"key":"35_CR1","unstructured":"Amodei, D., et al.: Deep speech 2: End-to-end speech recognition in English and Mandarin. In: International Conference on Machine Learning, pp. 173\u2013182 (2016)"},{"key":"35_CR2","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1016\/j.specom.2016.11.007","volume":"86","author":"M Borsky","year":"2017","unstructured":"Borsky, M., Mizera, P., Pollak, P., Nouza, J.: Dithering techniques in automatic recognition of speech corrupted by MP3 compression: analysis, solutions and experiments. Speech Commun. 86, 75\u201384 (2017)","journal-title":"Speech Commun."},{"issue":"1","key":"35_CR3","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1186\/s13636-015-0064-7","volume":"2015","author":"M Borsky","year":"2015","unstructured":"Borsky, M., Pollak, P., Mizera, P.: Advanced acoustic modelling techniques in MP3 speech recognition. EURASIP J. Audio Speech Music Process. 2015(1), 20 (2015)","journal-title":"EURASIP J. Audio Speech Music Process."},{"issue":"9","key":"35_CR4","doi-asserted-by":"publisher","first-page":"1469","DOI":"10.1109\/TASLP.2015.2438544","volume":"23","author":"X Cui","year":"2015","unstructured":"Cui, X., Goel, V., Kingsbury, B.: Data augmentation for deep neural network acoustic modeling. IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP) 23(9), 1469\u20131477 (2015)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP)"},{"issue":"1","key":"35_CR5","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"G Dahl","year":"2012","unstructured":"Dahl, G., Yu, D., Deng, L., Acero, A.: Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition. IEEE Trans. Audio Speech Lang. Process. 20(1), 30\u201342 (2012). https:\/\/doi.org\/10.1109\/TASL.2011.2134090","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"35_CR6","unstructured":"FFmpeg team: Ffmpeg - cross-platform solution to record, convert and stream audio and video. Software version: 20170525\u2013b946bd8. https:\/\/www.ffmpeg.org\/"},{"key":"35_CR7","doi-asserted-by":"crossref","unstructured":"Fraga-Silva, T., et al.: Active learning based data selection for limited resource STT and KWS. In: Sixteenth Annual Conference of the International Speech Communication Association (2015)","DOI":"10.21437\/Interspeech.2015-636"},{"key":"35_CR8","doi-asserted-by":"crossref","unstructured":"Fraga-Silva, T., et al.: Improving data selection for low-resource STT and KWS. In: 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 153\u2013159. IEEE (2015)","DOI":"10.1109\/ASRU.2015.7404788"},{"key":"35_CR9","unstructured":"Garofolo, J.S., et al.: TIMIT acoustic-phonetic continuous speech corpus. Linguist. Data Consortium, 10(5) (1993)"},{"key":"35_CR10","unstructured":"Jaitly, N., Hinton, G.E.: Vocal tract length perturbation (VTLP) improves speech recognition. In: Proceeding of the ICML Workshop on Deep Learning for Audio, Speech and Language, pp. 625\u2013660 (2013)"},{"key":"35_CR11","doi-asserted-by":"crossref","unstructured":"Kanda, N., Takeda, R., Obuchi, Y.: Elastic spectral distortion for low resource speech recognition with deep neural networks. In: 2013 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 309\u2013314. IEEE (2013)","DOI":"10.1109\/ASRU.2013.6707748"},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Kemp, T., Waibel, A.: Unsupervised training of a speech recognizer: recent experiments. In: Eurospeech (1999)","DOI":"10.21437\/Eurospeech.1999-599"},{"issue":"1","key":"35_CR13","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1186\/s13634-016-0306-6","volume":"2016","author":"K Kinoshita","year":"2016","unstructured":"Kinoshita, K., et al.: A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research. EURASIP J. Adv. Signal Process. 2016(1), 7 (2016)","journal-title":"EURASIP J. Adv. Signal Process."},{"key":"35_CR14","doi-asserted-by":"crossref","unstructured":"Kneser, R., Ney, H.: Improved backing-off for m-gram language modeling. In: 1995 International Conference on Acoustics, Speech, and Signal Processing 1995, ICASSP 1995, vol. 1, pp. 181\u2013184. IEEE (1995)","DOI":"10.1109\/ICASSP.1995.479394"},{"key":"35_CR15","doi-asserted-by":"crossref","unstructured":"Ko, T., Peddinti, V., Povey, D., Khudanpur, S.: Audio augmentation for speech recognition. In: INTERSPEECH, pp. 3586\u20133589 (2015)","DOI":"10.21437\/Interspeech.2015-711"},{"issue":"4","key":"35_CR16","doi-asserted-by":"publisher","first-page":"745","DOI":"10.1109\/TASLP.2014.2304637","volume":"22","author":"J Li","year":"2014","unstructured":"Li, J., Deng, L., Gong, Y., Haeb-Umbach, R.: An overview of noise-robust automatic speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(4), 745\u2013777 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"35_CR17","doi-asserted-by":"crossref","unstructured":"Ma, J., Schwartz, R.: Unsupervised versus supervised training of acoustic models. In: Ninth Annual Conference of the International Speech Communication Association (2008)","DOI":"10.21437\/Interspeech.2008-122"},{"issue":"5","key":"35_CR18","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1109\/79.536825","volume":"13","author":"RJ Mammone","year":"1996","unstructured":"Mammone, R.J., Zhang, X., Ramachandran, R.P.: Robust speaker recognition: a feature-based approach. IEEE Signal Process. Mag. 13(5), 58 (1996)","journal-title":"IEEE Signal Process. Mag."},{"key":"35_CR19","doi-asserted-by":"crossref","unstructured":"Polacky, J., Jarina, R., Chmulik, M.: Assessment of automatic speaker verification on lossy transcoded speech. In: 2016 4th International Workshop on Biometrics and Forensics (IWBF), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/IWBF.2016.7449679"},{"key":"35_CR20","doi-asserted-by":"crossref","unstructured":"Raghavan, S., et al.: A comparative study on the effect of different codecs on speech recognition accuracy using various acoustic modeling techniques. In: 2017 Twenty-third National Conference on Communications (NCC), pp. 1\u20136. IEEE (2017)","DOI":"10.1109\/NCC.2017.8077042"},{"key":"35_CR21","doi-asserted-by":"crossref","unstructured":"Seltzer, M.L., Yu, D., Wang, Y.: An investigation of deep neural networks for noise robust speech recognition. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7398\u20137402. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"35_CR22","unstructured":"Siegert, I., Lotz, A.F., Maruschke, M., Jokisch, O., Wendemuth, A.: Emotion intelligibility within codec-compressed and reduced bandwidth speech. In: ITG Symposium, Proceedings of Speech Communication, vol. 12, pp. 1\u20135. VDE (2016)"},{"key":"35_CR23","unstructured":"Torch team: Torch - a scientific computing framework for luajit. http:\/\/torch.ch"},{"key":"35_CR24","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1016\/j.csl.2016.11.005","volume":"46","author":"E Vincent","year":"2016","unstructured":"Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R.: An analysis of environment, microphone and data simulation mismatches in robust speech recognition. Comput. Speech Lang. 46, 535\u2013557 (2016)","journal-title":"Comput. Speech Lang."},{"key":"35_CR25","doi-asserted-by":"crossref","unstructured":"Xiong, W., et al.: The Microsoft 2016 conversational speech recognition system. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5255\u20135259. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7953159"},{"key":"35_CR26","first-page":"2","volume":"2","author":"S Young","year":"1994","unstructured":"Young, S., Young, S.: The HTK hidden Markov model toolkit: design and philosophy. Entrop. Cambridge Res. Lab. Ltd. 2, 2\u201344 (1994)","journal-title":"Entrop. Cambridge Res. Lab. Ltd."}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-00794-2_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T00:28:48Z","timestamp":1751848128000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-00794-2_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030007935","9783030007942"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-00794-2_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"8 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}