{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T10:16:14Z","timestamp":1773656174213,"version":"3.50.1"},"reference-count":19,"publisher":"Pleiades Publishing Ltd","issue":"12","license":[{"start":{"date-parts":[[2014,12,1]],"date-time":"2014-12-01T00:00:00Z","timestamp":1417392000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,12,1]],"date-time":"2014-12-01T00:00:00Z","timestamp":1417392000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Autom Remote Control"],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1134\/s000511791412008x","type":"journal-article","created":{"date-parts":[[2014,12,15]],"date-time":"2014-12-15T21:18:07Z","timestamp":1418678287000},"page":"2190-2200","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["An automatic multimodal speech recognition system with audio and video information"],"prefix":"10.1134","volume":"75","author":[{"given":"A. A.","family":"Karpov","sequence":"first","affiliation":[]}],"member":"137","published-online":{"date-parts":[[2014,12,17]]},"reference":[{"key":"188_CR1","first-page":"7","volume-title":"Tr. SPIIRAN","author":"IS Kipyatkova","year":"2010","unstructured":"Kipyatkova, I.S. and Karpov, A.A., An Analytical Survey of Large Vocabulary Russian Speech Recognition Systems, Tr. SPIIRAN, 2010, no. 12, pp. 7\u201320."},{"key":"188_CR2","first-page":"254","volume-title":"Proc. Int. Conf. Graphicon\u201903, Moscow","author":"S Soldatov","year":"2003","unstructured":"Soldatov, S., Lip Reading: Preparing Feature Vectors, in Proc. Int. Conf. Graphicon\u201903, Moscow, 2003, pp. 254\u2013256."},{"key":"188_CR3","first-page":"86","volume-title":"Komp\u2019yut. Mat.","author":"YuV Krak","year":"2009","unstructured":"Krak, Yu.V., Barmak, A.V., and Ternov, A.S., Information Technology Designed for Automatic Lip Reading for Ukrainian Language, Komp\u2019yut. Mat., 2009, no. 1, pp. 86\u201395."},{"key":"188_CR4","first-page":"2013","volume-title":"Proc. Int. Conf. ICASSP\u201902, Orlando, USA","author":"A Nefian","year":"2002","unstructured":"Nefian, A., Liang, L., Pi, X., et al., A Coupled HMM for Audio-Visual Speech Recognition, Proc. Int. Conf. ICASSP\u201902, Orlando, USA, 2002, pp. 2013\u20132016."},{"issue":"7","key":"188_CR5","first-page":"91","volume":"8","author":"AA Karpov","year":"2010","unstructured":"Karpov, A.A., Automatic Recognition of Audio-visual Russian Speech by Asynchronous Model, Inform.-Izm. Upravl. Sist., 2010, vol. 8, no. 7, pp. 91\u201396.","journal-title":"Inform.-Izm. Upravl. Sist."},{"key":"188_CR6","volume-title":"The HTK Book. HTK Version 3.4","author":"S Young","year":"2009","unstructured":"Young, S., Evermann, G., Gales, M., et al., The HTK Book. HTK Version 3.4, Cambridge: Cambridge Univ. Press, 2009."},{"key":"188_CR7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-49127-9","volume-title":"Springer Handbook of Speech Processing","author":"J Benesty","year":"2008","unstructured":"Benesty, J., Sondhi, M., Huang, Y., et al., Springer Handbook of Speech Processing, New York: Springer, 2008."},{"key":"188_CR8","volume-title":"Komp\u2019yut. Grafika Mul\u2019timedia","author":"A Vezhnevets","year":"2006","unstructured":"Vezhnevets, A. and Vezhnevets, V., Boosting\u2014Strengthening Simple Classifiers, Komp\u2019yut. Grafika Mul\u2019timedia, 2006, no. 4, no. 2 (http:\/\/cgm.computergraphics.ru\/content\/view\/112)."},{"issue":"3","key":"188_CR9","first-page":"481","volume":"22","author":"M Castrillyn","year":"2011","unstructured":"Castrillyn, M., Deniz, O., Hernandez, D., et al., A Comparison of Face and Facial Feature Detectors Based on the Viola-Jones General Object Detection Framework, Machine Vision Appl., 2011, vol. 22, no. 3, pp. 481\u2013494.","journal-title":"Machine Vision Appl."},{"key":"188_CR10","volume-title":"Learning OpenCV","author":"G Bradsky","year":"2008","unstructured":"Bradsky, G. and Kaehler, A., Learning OpenCV, Sebastopol, California: O\u2019Reilly, 2008."},{"key":"188_CR11","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1109\/ICME.2002.1035365","volume":"2","author":"L Liang","year":"2002","unstructured":"Liang, L., Liu, X., Zhao, Y., et al., Speaker Independent Audio-Visual Continuous Speech Recognition, Proc. Int. Conf. on Multimedia and Expo ICME\u201902, Lausanne, Switzerland, 2002, vol. 2, pp. 25\u201328.","journal-title":"Proc. Int. Conf. on Multimedia and Expo ICME\u201902, Lausanne, Switzerland"},{"issue":"4","key":"188_CR12","first-page":"845","volume":"163","author":"VI Levenshtein","year":"1965","unstructured":"Levenshtein, V.I., Binary Codes Capable of Correcting Deletions, Insertions, and Reversals, Dokl. Akad. Nauk USSR, 1965, vol. 163, no. 4, pp. 845\u2013848.","journal-title":"Dokl. Akad. Nauk USSR"},{"key":"188_CR13","first-page":"66","volume-title":"Probl. Upravlen.","author":"AA Saakyan","year":"2009","unstructured":"Saakyan, A.A., Investigation of Quality Measures for Speech Recognition Systems, Probl. Upravlen., 2009, no. 4, pp. 66\u201373."},{"key":"188_CR14","first-page":"409","volume-title":"Proc. 29th IEEE Int. Conf. on Acoustics, Speech, and Signal Processing ICASSP\u201904, Montreal, Canada","author":"M Bisani","year":"2004","unstructured":"Bisani, M. and Ney, H., Bootstrap Estimates for Confidence Intervals in ASR Performance Evaluation, Proc. 29th IEEE Int. Conf. on Acoustics, Speech, and Signal Processing ICASSP\u201904, Montreal, Canada, 2004, pp. 409\u2013412."},{"key":"188_CR15","first-page":"1260","volume-title":"EURASIP J. Appl. Signal Process.","author":"M Heckmann","year":"2002","unstructured":"Heckmann, M., Berthommier, F., and Kroschel, K., Noise Adaptive Stream Weighting in Audio-Visual Speech Recognition, EURASIP J. Appl. Signal Process., 2002, no. 1, pp. 1260\u20131273."},{"key":"188_CR16","first-page":"237","volume-title":"Proc. Int. Conf. on Multimodal Interfaces ICMI\u201908, Chania","author":"M Gurban","year":"2008","unstructured":"Gurban, M., Thiran, J.P., Drugman, T., et al., Dynamic Modality Weighting for Multi-Stream HMMs in Audio-Visual Speech Recognition, Proc. Int. Conf. on Multimodal Interfaces ICMI\u201908, Chania, 2008, pp. 237\u2013240."},{"issue":"7","key":"188_CR17","doi-asserted-by":"publisher","first-page":"1389","DOI":"10.1134\/S000511791107006X","volume":"72","author":"RM Yusupov","year":"2011","unstructured":"Yusupov, R.M., Ronzhin, A.L., Prishchepa, M.V., et al., Models and Hardware-Software Solutions for Automatic Control of Intelligent Hall, Autom. Remote Control, 2011, vol. 72, no. 7, pp. 1389\u20131397.","journal-title":"Autom. Remote Control"},{"issue":"4","key":"188_CR18","doi-asserted-by":"publisher","first-page":"434","DOI":"10.1134\/S0005117909030096","volume":"70","author":"RV Bilik","year":"2009","unstructured":"Bilik, R.V., Zhozhikashvili, V.A., Petukhova, N.V., et al., Analysis of the Oral Interface in the Interactive Servicing Systems. II, Autom. Remote Control, 2009, vol. 70, no. 4, pp. 434\u2013448.","journal-title":"Autom. Remote Control"},{"issue":"3","key":"188_CR19","doi-asserted-by":"publisher","first-page":"546","DOI":"10.1134\/S1054661809030225","volume":"19","author":"AA Karpov","year":"2009","unstructured":"Karpov, A.A. and Ronzhin, A.L., Information Enquiry Kiosk with Multimodal User Interface, Pattern Recogn. Image Anal., 2009, vol. 19, no. 3, pp. 546\u2013558.","journal-title":"Pattern Recogn. Image Anal."}],"container-title":["Automation and Remote Control"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1134\/S000511791412008X.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1134\/S000511791412008X","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1134\/S000511791412008X","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1134\/S000511791412008X.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T22:29:37Z","timestamp":1773613777000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1134\/S000511791412008X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12]]},"references-count":19,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2014,12]]}},"alternative-id":["188"],"URL":"https:\/\/doi.org\/10.1134\/s000511791412008x","relation":{},"ISSN":["0005-1179","1608-3032"],"issn-type":[{"value":"0005-1179","type":"print"},{"value":"1608-3032","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,12]]},"assertion":[{"value":"28 March 2012","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2014","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}