{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T20:37:21Z","timestamp":1725914241496},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319701356"},{"type":"electronic","value":"9783319701363"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-70136-3_91","type":"book-chapter","created":{"date-parts":[[2017,10,25]],"date-time":"2017-10-25T06:33:43Z","timestamp":1508913223000},"page":"859-869","source":"Crossref","is-referenced-by-count":2,"title":["Word-Level Permutation and Improved Lower Frame Rate for RNN-Based Acoustic Modeling"],"prefix":"10.1007","author":[{"given":"Yuanyuan","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shiyu","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuang","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,26]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Graves, A., Jaitly, N., Mohamed, A.: Hybrid speech recognition with deep bidirectional LSTM. In: 2013 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 273\u2013278. IEEE (2013)","key":"91_CR1","DOI":"10.1109\/ASRU.2013.6707742"},{"doi-asserted-by":"crossref","unstructured":"Sak, H., Senior, A., Beaufays, F.: Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition. In: Interspeech (2014)","key":"91_CR2","DOI":"10.21437\/Interspeech.2014-80"},{"doi-asserted-by":"crossref","unstructured":"Sak, H., Senior, A., Rao, K., Beaufays, F.: Fast and accurate recurrent neural network acoustic models for speech recognition. arXiv preprint arXiv:1507.06947 (2015)","key":"91_CR3","DOI":"10.21437\/Interspeech.2015-350"},{"doi-asserted-by":"crossref","unstructured":"Sak, H., Senior, A., Rao, K., Irsoy, O., Graves, A., Beaufays, F., Schalkwyk, J.: Learning acoustic frame labeling for speech recognition with recurrent neural networks. In: Acoustics, Speech and Signal Processing (ICASSP), pp. 4280\u20134284 (2015)","key":"91_CR4","DOI":"10.1109\/ICASSP.2015.7178778"},{"key":"91_CR5","doi-asserted-by":"crossref","first-page":"22","DOI":"10.21437\/Interspeech.2016-275","volume":"2016","author":"G Pundak","year":"2016","unstructured":"Pundak, G., Sainath, T.N.: Lower frame rate neural network acoustic models. Interspeech 2016, 22\u201326 (2016)","journal-title":"Interspeech"},{"key":"91_CR6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2","volume-title":"Supervised Sequence Labelling with Recurrent Neural Networks","author":"A Graves","year":"2012","unstructured":"Graves, A.: Supervised Sequence Labelling with Recurrent Neural Networks. Springer, Heidelberg (2012). doi: 10.1007\/978-3-642-24797-2"},{"doi-asserted-by":"crossref","unstructured":"Soltau, H., Liao, H., Sak, H.: Neural speech recognizer: acoustic-to-word LSTM model for large vocabulary speech recognition. arXiv preprint arXiv:1610.09975 (2016)","key":"91_CR7","DOI":"10.21437\/Interspeech.2017-1566"},{"doi-asserted-by":"crossref","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning internal representations by error propagation. Technical report, DTIC Document (1985)","key":"91_CR8","DOI":"10.21236\/ADA164453"},{"key":"91_CR9","doi-asserted-by":"crossref","first-page":"490","DOI":"10.1162\/neco.1990.2.4.490","volume":"2","author":"RJ Williams","year":"1990","unstructured":"Williams, R.J., Peng, J.: An efficient gradient-based algorithm for on-line training of recurrent network trajectories. Neural Comput. 2, 490\u2013501 (1990)","journal-title":"Neural Comput."},{"doi-asserted-by":"crossref","unstructured":"K\u00fchnert, B., Nolan, F.: The origin of coarticulation. In: Coarticulation: Theory, Data and Techniques, pp. 7\u201330 (1999)","key":"91_CR10","DOI":"10.1017\/CBO9780511486395.002"},{"issue":"8","key":"91_CR11","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"91_CR12","doi-asserted-by":"crossref","first-page":"1868","DOI":"10.21437\/Interspeech.2016-71","volume":"2016","author":"N Kanda","year":"2016","unstructured":"Kanda, N., Lu, X., Kawai, H.: Maximum a posteriori based decoding for CTC acoustic models. Interspeech 2016, 1868\u20131872 (2016)","journal-title":"Interspeech"},{"doi-asserted-by":"crossref","unstructured":"Kanda, N., Tachimori, M., Lu, X., Kawai, H.: Training data pseudo-shuffling and direct decoding framework for recurrent neural network based acoustic modeling. In: Automatic Speech Recognition and Understanding (ASRU), pp. 15\u201321 (2015)","key":"91_CR13","DOI":"10.1109\/ASRU.2015.7404768"},{"key":"91_CR14","doi-asserted-by":"crossref","first-page":"3419","DOI":"10.21437\/Interspeech.2016-677","volume":"2016","author":"Y Zhao","year":"2016","unstructured":"Zhao, Y., Xu, S., Xu, B.: Multidimensional residual learning based on recurrent neural networks for acoustic modeling. Interspeech 2016, 3419\u20133423 (2016)","journal-title":"Interspeech"},{"issue":"1","key":"91_CR15","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"GE Dahl","year":"2012","unstructured":"Dahl, G.E., Yu, D., Deng, L., Acero, A.: Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition. IEEE Trans. Audio Speech Lang. Process. 20(1), 30\u201342 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"2","key":"91_CR16","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner, L.R.: A tutorial on hidden Markov models and selected applications in speech recognition. Proc. IEEE 77(2), 257\u2013286 (1989)","journal-title":"Proc. IEEE"},{"doi-asserted-by":"crossref","unstructured":"Senior, A., Sak, H., Shafran, I.: Context dependent phone models for LSTM RNN acoustic modelling. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4585\u20134589. IEEE (2015)","key":"91_CR17","DOI":"10.1109\/ICASSP.2015.7178839"},{"doi-asserted-by":"crossref","unstructured":"Miao, Y., Gowayyed, M., Metze, F.: EESEN: end-to-end speech recognition using deep RNN models and WFST-based decoding. In: 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 167\u2013174. IEEE (2015)","key":"91_CR18","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"91_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/978-3-642-35289-8_3","volume-title":"Neural Networks: Tricks of the Trade","author":"YA LeCun","year":"2012","unstructured":"LeCun, Y.A., Bottou, L., Orr, G.B., M\u00fcller, K.-R.: Efficient BackProp. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. LNCS, vol. 7700, pp. 9\u201348. Springer, Heidelberg (2012). doi: 10.1007\/978-3-642-35289-8_3"},{"key":"91_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"724","DOI":"10.1007\/11939993_73","volume-title":"Chinese Spoken Language Processing","author":"Y Liu","year":"2006","unstructured":"Liu, Y., Fung, P., Yang, Y., Cieri, C., Huang, S., Graff, D.: HKUST\/MTS: a very large scale Mandarin telephone speech corpus. In: Huo, Q., Ma, B., Chng, E.-S., Li, H. (eds.) ISCSLP 2006. LNCS, vol. 4274, pp. 724\u2013735. Springer, Heidelberg (2006). doi: 10.1007\/11939993_73"},{"doi-asserted-by":"crossref","unstructured":"Seide, F., Li, G., Chen, X., Yu, D.: Feature engineering in context-dependent deep neural networks for conversational speech transcription. In: IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 24\u201329 (2011)","key":"91_CR21","DOI":"10.1109\/ASRU.2011.6163899"},{"doi-asserted-by":"crossref","unstructured":"Zhang, S., Zhang, C., You, Z., Zheng, R., Xu, B.: Asynchronous stochastic gradient descent for DNN training. In: ICASSP, pp. 6660\u20136663. IEEE (2013)","key":"91_CR22","DOI":"10.1109\/ICASSP.2013.6638950"},{"doi-asserted-by":"crossref","unstructured":"Bengio, Y., Lamblin, P., Popovici, D., Larochelle, H., et al.: Greedy layer-wise training of deep networks. In: Advances in Neural Information Processing Systems, vol. 19, p. 153 (2007)","key":"91_CR23","DOI":"10.7551\/mitpress\/7503.003.0024"},{"doi-asserted-by":"crossref","unstructured":"Li, J., Zhang, H., Cai, X., Xu, B.: Towards end-to-end speech recognition for Chinese Mandarin using long short-term memory recurrent neural networks. In: Sixteenth Annual Conference of the International Speech Communication Association (2015)","key":"91_CR24","DOI":"10.21437\/Interspeech.2015-717"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-70136-3_91","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,27]],"date-time":"2023-08-27T10:53:51Z","timestamp":1693133631000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-70136-3_91"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319701356","9783319701363"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-70136-3_91","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}