{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T03:40:39Z","timestamp":1761709239838},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319439570"},{"type":"electronic","value":"9783319439587"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-43958-7_29","type":"book-chapter","created":{"date-parts":[[2016,8,12]],"date-time":"2016-08-12T02:11:41Z","timestamp":1470967901000},"page":"246-253","source":"Crossref","is-referenced-by-count":20,"title":["DNN-Based Acoustic Modeling for Russian Speech Recognition Using Kaldi"],"prefix":"10.1007","author":[{"given":"Irina","family":"Kipyatkova","sequence":"first","affiliation":[]},{"given":"Alexey","family":"Karpov","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,8,13]]},"reference":[{"key":"29_CR1","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-5779-3","volume-title":"Automatic Speech Recognition - A Deep Learning Approach","author":"D Yu","year":"2015","unstructured":"Yu, D., Deng, L.: Automatic Speech Recognition - A Deep Learning Approach. Springer, London (2015)"},{"key":"29_CR2","unstructured":"Povey, D. et al.: The Kaldi speech recognition toolkit. In: IEEE Workshop on Automatic Speech Recognition and Understanding ASRU (2011)"},{"key":"29_CR3","doi-asserted-by":"crossref","unstructured":"Vesel\u00fd, K. et al.: Sequence-discriminative training of deep neural networks. In: INTERSPEECH 2013, pp. 2345\u20132349 (2013)","DOI":"10.21437\/Interspeech.2013-548"},{"key":"29_CR4","unstructured":"Povey, D., Zhang, X., Khudanpur, S.: Parallel training of DNNs with natural gradient and parameter averaging. preprint arXiv:1410.7455 http:\/\/arxiv.org\/pdf\/1410.7455v8.pdf (2014)"},{"key":"29_CR5","first-page":"153","volume":"19","author":"Y Bengio","year":"2007","unstructured":"Bengio, Y., Lamblin, P., Popovici, D., Larochelle, H.: Greedy layer-wise training of deep networks. Adv. Neural Inf. Process. Syst. (NIPS) 19, 153\u2013160 (2007)","journal-title":"Adv. Neural Inf. Process. Syst. (NIPS)"},{"key":"29_CR6","doi-asserted-by":"crossref","unstructured":"Seide, F., Li, G., Yu, D.: Conversational speech transcription using context-dependent deep neural networks. In: INTERSPEECH-2011, pp. 437\u2013440 (2011)","DOI":"10.21437\/Interspeech.2011-169"},{"issue":"1","key":"29_CR7","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"G Dahl","year":"2012","unstructured":"Dahl, G., Yu, D., Deng, L., Acero, A.: Context-dependent pre-trained deep neural networks for large vocabulary speech recognition. IEEE Trans. Audio Speech Lang. Process. 20(1), 30\u201342 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"29_CR8","doi-asserted-by":"crossref","unstructured":"Seide, F., Li, G., Yu, D.: Conversational speech transcription using context-dependent deep neural networks. In: INTERSPEECH-2011, pp. 437\u2013440 (2011)","DOI":"10.21437\/Interspeech.2011-169"},{"key":"29_CR9","doi-asserted-by":"crossref","unstructured":"Ellis, D.P.W., Singh, R., Sivadas, S.: Tandem acoustic modeling in large-vocabulary recognition. In: International Conference on Acoustics, Speech and Signal Processing ICASSP 2001, pp. 517\u2013520 (2001)","DOI":"10.1109\/ICASSP.2001.940881"},{"key":"29_CR10","doi-asserted-by":"crossref","unstructured":"Grezl, F., Karafiat, M., Kontar, S., Cernocky, J.: Probabilistic and bottle-neck features for LVCSR of meetings. In: ICASSP 2007, pp. 757\u2013760 (2007)","DOI":"10.1109\/ICASSP.2007.367023"},{"key":"29_CR11","unstructured":"Maas, A.L. et al.: Building DNN Acoustic Models for Large Vocabulary Speech Recognition. preprint arXiv:1406.7806, http:\/\/arxiv.org\/pdf\/1406.7806.pdf (2015)"},{"key":"29_CR12","doi-asserted-by":"crossref","unstructured":"Cosi, P.: A KALDI-DNN-based ASR system for Italian. In: IEEE International Joint Conference on Neural Networks IJCNN 2015, pp. 1\u20135 (2015)","DOI":"10.1109\/IJCNN.2015.7280336"},{"key":"29_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"186","DOI":"10.1007\/978-3-319-23132-7_23","volume-title":"Speech and Computer","author":"B Popovi\u0107","year":"2015","unstructured":"Popovi\u0107, B., Ostrogonac, S., Pakoci, E., Jakovljevi\u0107, N., Deli\u0107, V.: Deep neural network based continuous speech recognition for Serbian using the Kaldi toolkit. In: Ronzhin, A., Potapova, R., Fakotakis, N. (eds.) SPECOM 2015. LNCS, vol. 9319, pp. 186\u2013192. Springer, Heidelberg (2015)"},{"key":"29_CR14","unstructured":"Miao, Y.: Kaldi+PDNN: building DNN-based ASR systems with Kaldi and PDNN. arXiv preprint arXiv:1401.6984, https:\/\/arxiv.org\/abs\/1401.6984 (2014)"},{"key":"29_CR15","first-page":"40","volume":"10","author":"MYu Zulkarneev","year":"2013","unstructured":"Zulkarneev, M.Yu., Penalov, S.A.: System of speech recognition for Russian language, using deep neural networks and finite state transducers. Neurocomput. Dev. Appl. 10, 40\u201346 (2013). (in Russia)","journal-title":"Neurocomput. Dev. Appl."},{"key":"29_CR16","doi-asserted-by":"crossref","unstructured":"Tomashenko, N., Khokhlov, Y.: Speaker adaptation of context dependent deep neural networks based on MAP-adaptation and GMM-derived feature processing. In: INTERSPEECH 2014, Singapore, pp. 2997\u20133001 (2014)","DOI":"10.21437\/Interspeech.2014-501"},{"key":"29_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"234","DOI":"10.1007\/978-3-319-23132-7_29","volume-title":"Speech and Computer","author":"A Prudnikov","year":"2015","unstructured":"Prudnikov, A., Medennikov, I., Mendelev, V., Korenevsky, M., Khokhlov, Y.: Improving acoustic models for Russian spontaneous speech recognition. In: Ronzhin, A., Potapova, R., Fakotakis, N. (eds.) SPECOM 2015. LNCS, vol. 9319, pp. 234\u2013242. Springer, Heidelberg (2015)"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Zhang, X. et al.: Improving deep neural network acoustic models using generalized maxout networks. In: ICASSP 2014, pp. 215\u2013219 (2014)","DOI":"10.1109\/ICASSP.2014.6853589"},{"key":"29_CR19","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1016\/j.specom.2013.07.004","volume":"56","author":"A Karpov","year":"2014","unstructured":"Karpov, A., Markov, K., Kipyatkova, I., Vazhenina, D., Ronzhin, A.: Large vocabulary Russian speech recognition using syntactico-statistical language modeling. Speech Commun. 56, 213\u2013228 (2014)","journal-title":"Speech Commun."},{"key":"29_CR20","unstructured":"State Standard P\u00a050840-95. Speech transmission by communication paths. Evaluation methods of quality, intelligibility and recognizability, Moscow. Standartov Publ. (1996) (in Russia)"},{"key":"29_CR21","unstructured":"Stepanova, S.B.: Phonetic features of Russian speech: realization and transcription. Ph.D. Thesis (1988) (in Russia)"},{"key":"29_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1007\/978-3-319-01931-4_29","volume-title":"Speech and Computer","author":"I Kipyatkova","year":"2013","unstructured":"Kipyatkova, I., Karpov, A.: Lexicon size and language model order optimization for Russian LVCSR. In: \u017delezn\u00fd, M., Habernal, I., Ronzhin, A. (eds.) SPECOM 2013. LNCS, vol. 8113, pp. 219\u2013226. Springer, Heidelberg (2013)"},{"key":"29_CR23","doi-asserted-by":"crossref","unstructured":"Kneser, R., Ney, H.: Improved backing-off for m-gram language modeling. In: ICASSP 1995, pp. 181\u2013184 (1995)","DOI":"10.1109\/ICASSP.1995.479394"},{"key":"29_CR24","unstructured":"Stolcke, A., Zheng, J., Wang, W., Abrash, V.: SRILM at sixteen: update and outlook. In: ASRU 2011, Waikoloa, Hawaii, USA (2011)"},{"key":"29_CR25","unstructured":"Kipyatkova, I., Karpov, A., Verkhodanova, V., Zelezny, M.: Analysis of long-distance word dependencies and pronunciation variability at conversational Russian speech recognition. In: Federated Conference on Computer Science and Information Systems, FedCSIS 2012, pp. 719\u2013725 (2012)"},{"key":"29_CR26","doi-asserted-by":"crossref","unstructured":"Karpov, A., Kipyatkova, I., Ronzhin, A.: Very large vocabulary ASR for spoken Russian with syntactic and morphemic analysis. In: INTERSPEECH 2011, Florence, Italy, pp. 3161\u20133164 (2011)","DOI":"10.21437\/Interspeech.2011-791"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-43958-7_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,19]],"date-time":"2023-08-19T16:58:07Z","timestamp":1692464287000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-43958-7_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319439570","9783319439587"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-43958-7_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}