{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:51:44Z","timestamp":1740099104610,"version":"3.37.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319995786"},{"type":"electronic","value":"9783319995793"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-99579-3_31","type":"book-chapter","created":{"date-parts":[[2018,8,24]],"date-time":"2018-08-24T07:36:09Z","timestamp":1535096169000},"page":"291-300","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Improving Russian LVCSR Using Deep Neural Networks for Acoustic and Language Modeling"],"prefix":"10.1007","author":[{"given":"Irina","family":"Kipyatkova","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,25]]},"reference":[{"key":"31_CR1","series-title":"Signals and Communication Technology","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5779-3","volume-title":"Automatic Speech Recognition","author":"Dong Yu","year":"2015","unstructured":"Yu, Dong, Deng, Li: Automatic Speech Recognition. SCT. Springer, London (2015). https:\/\/doi.org\/10.1007\/978-1-4471-5779-3"},{"issue":"3","key":"31_CR2","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1109\/JAS.2017.7510508","volume":"4","author":"D Yu","year":"2017","unstructured":"Yu, D., Li, J.: Recent progresses in deep learning based acoustic models. IEEE\/CAA J. Automatica Sinica 4(3), 396\u2013409 (2017)","journal-title":"IEEE\/CAA J. Automatica Sinica"},{"doi-asserted-by":"crossref","unstructured":"Kipyatkova, I., Karpov, A.: Variants of deep artificial neural networks for speech recognition systems. In: SPIIRAS Proceedings, vol. 6, no. 49, pp. 80\u2013103 (2016). (in Rus.), http:\/\/dx.doi.org\/10.15622\/sp.49.5","key":"31_CR3","DOI":"10.15622\/sp.49.5"},{"doi-asserted-by":"crossref","unstructured":"Peddini, V., Povey, D., Khundanpur, S.: A time delay neural network architecture for efficient modeling of long temporal contexts. In: INTERSPEECH-2015, pp. 3214\u20133218 (2015)","key":"31_CR4","DOI":"10.21437\/Interspeech.2015-647"},{"doi-asserted-by":"crossref","unstructured":"Sun, M., et al: Compressed time delay neural network for small-footprint keyword spotting. In: INTERSPEECH -2017, pp. 3607\u20133611 (2017)","key":"31_CR5","DOI":"10.21437\/Interspeech.2017-480"},{"doi-asserted-by":"crossref","unstructured":"Geiger, J.T., Zhang, Z., Weninger, F., Schuller, B., Rigoll, G.: Robust speech recognition using long short-term memory recurrent neural networks for hybrid acoustic modelling. In: INTERSPEECH-2014, pp. 631\u2013635 (2014)","key":"31_CR6","DOI":"10.21437\/Interspeech.2014-151"},{"doi-asserted-by":"crossref","unstructured":"Zeyer, A., Doetsch, P., Voigtlaender, P., Schl\u00fcter, R., Ney, H.: A comprehensive study of deep bidirectional LSTM RNNs for acoustic modeling in speech recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP-2017), pp.\u00a02462\u20132466 (2017)","key":"31_CR7","DOI":"10.1109\/ICASSP.2017.7952599"},{"issue":"3","key":"31_CR8","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1109\/LSP.2017.2723507","volume":"25","author":"V Peddinti","year":"2018","unstructured":"Peddinti, V., Wang, Y., Povey, D., Khudanpur, S.: Low latency acoustic modeling using temporal convolution and LSTMs. IEEE Sig. Process. Lett. 25(3), 373\u2013377 (2018)","journal-title":"IEEE Sig. Process. Lett."},{"doi-asserted-by":"crossref","unstructured":"Wang, Y., Chen, X., Gales, M., Ragni, A., Wong, J.: Phonetic and graphemic systems for multi-genre broadcast transcription. Preprint arXiv:1802.00254 , https:\/\/arxiv.org\/pdf\/1802.06412.pdf (2018)","key":"31_CR9","DOI":"10.1109\/ICASSP.2018.8462353"},{"doi-asserted-by":"crossref","unstructured":"Mikolov, T., Karafiat, M., Burget, L., Cernocky, J., Khudanpur, S.: Recurrent neural network based language model. In: INTERSPEECH 2010, Makuhari, Chiba, Japan, pp. 1045\u20131048 (2010)","key":"31_CR10","DOI":"10.21437\/Interspeech.2010-343"},{"unstructured":"Su, C., Huang, H., Shi, S., Guo, Y., Wu, H.: A parallel recurrent neural network for language modeling with POS tags. In: Proceedings of the 31st Pacific Asia Conference on Language, Information and Computation (PACLIC), https:\/\/paclic31.national-u.edu.ph\/wp-content\/uploads\/2017\/11\/PACLIC_31_paper_125.pdf","key":"31_CR11"},{"key":"31_CR12","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/978-3-642-40585-3_14","volume-title":"Text, speech, and dialogue","author":"D Soutner","year":"2013","unstructured":"Soutner, D., M\u00fcller, L.: Application of LSTM Neural Networks in Language Modelling. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2013. LNCS (LNAI), vol. 8082, pp. 105\u2013112. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40585-3_14"},{"doi-asserted-by":"crossref","unstructured":"Chen, X., Ragni, A., Liu, X., Gales, M.J.: Investigating bidirectional recurrent neural network language models for speech recognition. In: INTERSPEECH-2017, pp. 269\u2013273 (2017)","key":"31_CR13","DOI":"10.21437\/Interspeech.2017-513"},{"doi-asserted-by":"crossref","unstructured":"Chen, X., Liu, X., Ragni, A., Wang, Y., Gales, M.: Future word contexts in neural network language models. In: Preprint arXiv:1708.05592 (2017)","key":"31_CR14","DOI":"10.1109\/ASRU.2017.8268922"},{"doi-asserted-by":"crossref","unstructured":"Xiong, W., Wu, L., Alleva, F., Droppo, J., Huang, X., Stolcke, A.: The microsoft 2017 conversational Speech recognition system. Preprint arXiv:1708.06073 , https:\/\/arxiv.org\/abs\/1708.06073 (2017)","key":"31_CR15","DOI":"10.1109\/ICASSP.2018.8461870"},{"doi-asserted-by":"crossref","unstructured":"Tomashenko, N., Khokhlov, Y.: Speaker adaptation of context dependent deep neural networks based on MAP-adaptation and GMM-derived feature processing. In: INTERSPEECH-2014, pp. 2997\u20133001 (2014)","key":"31_CR16","DOI":"10.21437\/Interspeech.2014-501"},{"key":"31_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-23132-7_29","volume-title":"Speech and Computer","author":"A Prudnikov","year":"2015","unstructured":"Prudnikov, A., Medennikov, I., Mendelev, V., Korenevsky, M., Khokhlov, Y.: Improving acoustic models for Russian spontaneous speech recognition. In: Ronzhin, A., Potapova, R., Fakotakis, N. (eds.) SPECOM 2015. LNCS (LNAI), vol. 9319, pp. 234\u2013242. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-23132-7_29"},{"key":"31_CR18","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1007\/978-3-319-01931-4_17","volume-title":"Speech and Computer","author":"D Vazhenina","year":"2013","unstructured":"Vazhenina, D., Markov, K.: Evaluation of advanced language modeling techniques for Russian LVCSR. In: \u017delezn\u00fd, M., Habernal, I., Ronzhin, A. (eds.) SPECOM 2013. LNCS (LNAI), vol. 8113, pp. 124\u2013131. Springer, Cham (2013). https:\/\/doi.org\/10.1007\/978-3-319-01931-4_17"},{"doi-asserted-by":"crossref","unstructured":"Kudinov, M.S.: On applicability of recurrent neural networks to language modelling for inflective languages. J. Siberian Federal Univ. Eng. Technol. 9(8), 1291\u20131301 (2016). (in Rus.)","key":"31_CR19","DOI":"10.17516\/1999-494X-2016-9-8-1291-1301"},{"unstructured":"Povey, D. et al.: The Kaldi speech recognition toolkit. In: IEEE Workshop on Automatic Speech Recognition and Understanding ASRU (2011)","key":"31_CR20"},{"doi-asserted-by":"crossref","unstructured":"Saon, G., Soltau, H., Nahamoo, D., Picheny, M.: Speaker adaptation of neural network acoustic models using i-Vectors. In: IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 55\u201359 (2013)","key":"31_CR21","DOI":"10.1109\/ASRU.2013.6707705"},{"unstructured":"Povey, D., Zhang, X., Khudanpur, S.: Parallel training of DNNs with natural gradient and parameter averaging. Preprint arXiv:1410.7455 , http:\/\/arxiv.org\/pdf\/1410.7455v8.pdf (2014)","key":"31_CR22"},{"doi-asserted-by":"crossref","unstructured":"Zhang, X., Trmal, J., Povey, D., Khudanpur, S.: Improving deep neural network acoustic models using generalized maxout networks. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 215\u2013219 (2014)","key":"31_CR23","DOI":"10.1109\/ICASSP.2014.6853589"},{"key":"31_CR24","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1007\/978-3-319-43958-7_29","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2016","unstructured":"Kipyatkova, I., Karpov, A.: DNN-based acoustic modeling for Russian speech recognition using Kaldi. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS (LNAI), vol. 9811, pp. 246\u2013253. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-43958-7_29"},{"key":"31_CR25","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"362","DOI":"10.1007\/978-3-319-66429-3_35","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2017","unstructured":"Kipyatkova, I.: Experimenting with Hybrid TDNN\/HMM acoustic models for Russian speech recognition. In: Karpov, A., Potapova, R., Mporas, I. (eds.) SPECOM 2017. LNCS (LNAI), vol. 10458, pp. 362\u2013369. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-66429-3_35"},{"issue":"8","key":"31_CR26","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"doi-asserted-by":"crossref","unstructured":"Geiger, J.T., et al.: Robust speech recognition using long short-term memory recurrent neural networks for hybrid acoustic modelling. In: INTERSPEECH-2014, pp. 631\u2013635 (2014)","key":"31_CR27","DOI":"10.21437\/Interspeech.2014-151"},{"issue":"1","key":"31_CR28","first-page":"11","volume":"10","author":"I Kipyatkova","year":"2013","unstructured":"Kipyatkova, I., Karpov, A., Verkhodanova, V., Zelezny, M.: Modeling of pronunciation, language and nonverbal units at conversational Russian speech recognition. Int. J. Comput. Sci. Appl. 10(1), 11\u201330 (2013)","journal-title":"Int. J. Comput. Sci. Appl."},{"unstructured":"Stolcke, A., Zheng, J., Wang, W., Abrash, V.: SRILM at sixteen: update and outlook. In: IEEE Automatic Speech Recognition and Understanding Workshop ASRU 2011 (2011)","key":"31_CR29"},{"doi-asserted-by":"crossref","unstructured":"Mikolov, T., Kombrink, S., Deoras, A., Burget, L., \u010cernock\u00fd, J.: RNNLM - Recurrent Neural Network Language Modeling Toolkit. In: ASRU 2011 Demo Session (2011)","key":"31_CR30","DOI":"10.1109\/ICASSP.2011.5947611"},{"doi-asserted-by":"crossref","unstructured":"Mikolov, T., Deoras, A., Povey, D., Burget, L., \u010cernock\u00fd, J.: Strategies for training large scale neural network language models. In: Proceedings of ASRU 2011, Hawaii, pp. 196\u2013201 (2011)","key":"31_CR31","DOI":"10.1109\/ASRU.2011.6163930"},{"key":"31_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"418","DOI":"10.1007\/978-3-319-40663-3_48","volume-title":"Advances in Neural Networks \u2013 ISNN 2016","author":"I Kipyatkova","year":"2016","unstructured":"Kipyatkova, I., Karpov, A.: Language models with RNNs for rescoring hypotheses of Russian ASR. In: Cheng, L., Liu, Q., Ronzhin, A. (eds.) ISNN 2016. LNCS, vol. 9719, pp. 418\u2013425. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-40663-3_48"},{"unstructured":"Jokisch, O., et al.: Multilingual speech data collection for the assessment of pronunciation and prosody in a language learning system. In: Proceedings of SPECOM\u2019 2009, pp. 515\u2013520 (2009)","key":"31_CR33"},{"unstructured":"Stepanova, S.B.: Phonetic features of Russian speech: realization and transcription. Ph.D. thesis (1988) (in Rus.)","key":"31_CR34"},{"unstructured":"State Standard P 50840\u201395. Speech transmission by communication paths. Evaluation methods of quality, intelligibility and recognizability. Moscow, Standartov Publ., 230\u00a0p. (1996) (in Rus.)","key":"31_CR35"},{"key":"31_CR36","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1007\/978-3-319-43958-7_40","volume-title":"Speech and Computer","author":"V Verkhodanova","year":"2016","unstructured":"Verkhodanova, V., Ronzhin, A., Kipyatkova, I., Ivanko, D., Karpov, A., \u017delezn\u00fd, M.: HAVRUS corpus: high-speed recordings of audio-visual Russian speech. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS (LNAI), vol. 9811, pp. 338\u2013345. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-43958-7_40"},{"key":"31_CR37","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/j.specom.2013.07.004","volume":"56","author":"A Karpov","year":"2014","unstructured":"Karpov, A., Markov, K., Kipyatkova, I., Vazhenina, D., Ronzhin, A.: Large vocabulary Russian speech recognition using syntactico-statistical language modeling. Speech Commun. 56, 213\u2013228 (2014)","journal-title":"Speech Commun."}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99579-3_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T04:58:32Z","timestamp":1661835512000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319995786","9783319995793"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99579-3_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}