{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T05:28:52Z","timestamp":1769318932989,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":24,"publisher":"Springer Singapore","isbn-type":[{"value":"9789811505942","type":"print"},{"value":"9789811505959","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,11,23]],"date-time":"2019-11-23T00:00:00Z","timestamp":1574467200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-981-15-0595-9_2","type":"book-chapter","created":{"date-parts":[[2019,11,22]],"date-time":"2019-11-22T17:02:36Z","timestamp":1574442156000},"page":"21-38","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Automatic Speech Recognition"],"prefix":"10.1007","author":[{"given":"Xugang","family":"Lu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masakiyo","family":"Fujimoto","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,11,23]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Xiong, W., Wu, L., Alleva, F., Droppo, J., Huang, X., Stolcke, A.: The Microsoft 2016 conversational speech recognition system. Microsoft Technical Report MSR-TR-2017-39. http:\/\/arxiv.org\/pdf\/1708.06073.pdf","DOI":"10.1109\/ICASSP.2017.7953159"},{"key":"2_CR2","unstructured":"Dixon, P.R., Hori, C., Kashioka, H.: Development of the SprinTra WFST speech decoder. NICT Res. J., 15\u201320 (2012)"},{"key":"2_CR3","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton, G., Deng, L., Yu, D., Dahl, G., Mohamed, A., Jaitly, N., Senior, A., Vanhoucke, V., Nguyen, P., Sainath, T., Kingsbury, B.: Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Signal Process. Mag. 29, 82\u201397 (2012)","journal-title":"IEEE Signal Process. Mag."},{"issue":"8","key":"2_CR4","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Cho, K., Merrienboer, B., Bahdanau, D., Bengio, Y.: On the properties of neural machine translation: encoder-decoder approaches. In: The 8th Workshop on Syntax, Semantics and Structure in Statistical Translation, SSST-8 (2014)","DOI":"10.3115\/v1\/W14-4012"},{"key":"2_CR6","doi-asserted-by":"publisher","unstructured":"Sainath, T.N., Vinyals, O., Senior, A., Sak, H.: Convolutional, long short-term memory, fully connected deep neural networks. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2015). https:\/\/doi.org\/10.1109\/icassp.2015.7178838","DOI":"10.1109\/icassp.2015.7178838"},{"key":"2_CR7","volume-title":"Approximation with artificial neural networks","author":"BC Cs\u00e1ji","year":"2001","unstructured":"Cs\u00e1ji, B.C.: Approximation with artificial neural networks. Faculty of Sciences; E\u00f6tv\u00f6s Lor\u00e1nd University, Hungary (2001)"},{"key":"2_CR8","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.: Imagenet classification with deep convolutional neural networks. In: Proceedings Advances in Neural Information Processing Systems (NIPS) (2012)"},{"key":"2_CR9","unstructured":"Srivastava, R.K., Greff, K., Schmidhuber, J.: Training very deep networks. In: Proceedings of NIPS (2015)"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Graves, A., Fernandez, S., Gomez, F., Shmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the International Conference on Machine Learning (ICML) (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"2_CR12","unstructured":"Graves, A., Jaitly, N.: Towards end-to-end speech recognition with recurrent neural networks. In: Proceedings of the International Conference on Machine Learning (ICML) (2014)"},{"key":"2_CR13","unstructured":"Chorowski, J., Bahdanau, D., Cho, K., Bengio, Y.: End-to-end continuous speech recognition using attention-based recurrent NN: First results. arXiv preprint arXiv:14121602 (2014)"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Miao, Y., Gowayyed, M., Metze, F.: EESEN: end-to-end speech recognition using deep RNN models and WFST-based decoding. In: Proceedings of IEEE-ASRU (2015)","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Kanda, N., Lu, X., Kawai, H.: Maximum a posteriori based decoding for CTC acoustic models. In: Proceedings of INTERSPEECH, pp. 1868\u20131872 (2016)","DOI":"10.21437\/Interspeech.2016-71"},{"issue":"2","key":"2_CR16","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"SF Boll","year":"1979","unstructured":"Boll, S.F.: Suppression of acoustic noise in speech using spectral subtraction. IEEE Trans. Audio Speech Signal Process. 27(2), 113\u2013120 (1979)","journal-title":"IEEE Trans. Audio Speech Signal Process."},{"key":"2_CR17","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Ephraim, Y., Malah, D.: Speech enhancement using a minimum mean-square error short-time spectral amplitude estimator. IEEE Trans. Audio Speech Signal Process. 32, 1109\u20131121 (1984)","journal-title":"IEEE Trans. Audio Speech Signal Process."},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Lu, X., Tsao, Y., Matsuda, S, Hori, C.: Speech enhancement based on deep denoising autoencoder. In: Proceedings of Interspeech \u201913, pp. 436\u2013440, August 2013","DOI":"10.21437\/Interspeech.2013-130"},{"issue":"10","key":"2_CR19","doi-asserted-by":"publisher","first-page":"2707","DOI":"10.1109\/TASL.2012.2210879","volume":"20","author":"T Yoshioka","year":"2012","unstructured":"Yoshioka, T., Nakatani, T.: Generalization of multi-channel linear prediction methods for blind MIMO impulse response shortening. IEEE Trans. Audio Speech Lang. Process. 20(10), 2707\u20132720 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"W\u00f6lfel, M., McDonough, M.: Minimum variance distortionless response spectral estimation. IEEE Signal Process. Mag. 22(5) (2005)","DOI":"10.1109\/MSP.2005.1511829"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Liao, H.: Speaker adaptation of context dependent deep neural networks. In: Proceedings of ICASSP \u201913, pp. 7947\u20137951, May 2013","DOI":"10.1109\/ICASSP.2013.6639212"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Seltzer, M., Yu, D., Wang, Y.: An investigation of deep neural networks for noise robust speech recognition. In: Proceedings of ICASSP \u201913, pp. 7398\u20137402, May 2013","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wang, D.: A joint training framework for robust automatic speech recognition. IEEE\/ACM Transa. Audio Speech Lang. Process. (2016)","DOI":"10.1109\/TASLP.2016.2528171"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Li, L., Sim, K.C.: Improving robustness of deep neural networks via spectral masking for automatic speech recognition. In: Proceedings of ASRU \u201913, pp. 279\u2013284, December 2013","DOI":"10.1109\/ASRU.2013.6707743"}],"container-title":["SpringerBriefs in Computer Science","Speech-to-Speech Translation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-15-0595-9_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,6]],"date-time":"2022-10-06T19:23:55Z","timestamp":1665084235000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-15-0595-9_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,23]]},"ISBN":["9789811505942","9789811505959"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-981-15-0595-9_2","relation":{},"ISSN":["2191-5768","2191-5776"],"issn-type":[{"value":"2191-5768","type":"print"},{"value":"2191-5776","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,11,23]]},"assertion":[{"value":"23 November 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}