{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T03:40:10Z","timestamp":1746416410679,"version":"3.40.4"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319115801"},{"type":"electronic","value":"9783319115818"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-11581-8_6","type":"book-chapter","created":{"date-parts":[[2014,10,10]],"date-time":"2014-10-10T14:29:54Z","timestamp":1412951394000},"page":"50-57","source":"Crossref","is-referenced-by-count":5,"title":["A Framework for Recording Audio-Visual Speech Corpora with a Microphone and a High-Speed Camera"],"prefix":"10.1007","author":[{"given":"Alexey","family":"Karpov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Irina","family":"Kipyatkova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Milo\u0161","family":"\u017delezn\u00fd","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"6_CR1","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/j.specom.2013.07.004","volume":"56","author":"A. Karpov","year":"2014","unstructured":"Karpov, A., Markov, K., Kipyatkova, I., Vazhenina, D., Ronzhin, A.: Large vocabulary Russian speech recognition using syntactico-statistical language modeling. Speech Communication\u00a056, 213\u2013228 (2014)","journal-title":"Speech Communication"},{"key":"6_CR2","unstructured":"Kipyatkova, I., Verkhodanova, V., Karpov, A.: Rescoring N-Best Lists for Russian Speech Recognition using Factored Language Models. In: Proc. 4th International Workshop on Spoken Language Technologies for Under-resourced Languages SLTU-2014, St. Petersburg, Russia, pp. 81\u201386 (2014)"},{"issue":"1","key":"6_CR3","first-page":"11","volume":"10","author":"I. Kipyatkova","year":"2013","unstructured":"Kipyatkova, I., Karpov, A., Verkhodanova, V., Zelezny, M.: Modeling of Pronunciation, Language and Nonverbal Units at Conversational Russian Speech Recognition. International Journal of Computer Science and Applications\u00a010(1), 11\u201330 (2013)","journal-title":"International Journal of Computer Science and Applications"},{"key":"6_CR4","series-title":"LNAI","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/978-3-319-01931-4_29","volume-title":"Speech and Computer","author":"I. Kipyatkova","year":"2013","unstructured":"Kipyatkova, I., Karpov, A.: Lexicon Size and Language Model Order Optimization for Russian LVCSR. In: \u017delezn\u00fd, M., Habernal, I., Ronzhin, A. (eds.) SPECOM 2013. LNCS (LNAI), vol.\u00a08113, pp. 219\u2013226. Springer, Heidelberg (2013)"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Potamianos, G., et al.: Audio-Visual Automatic Speech Recognition: An Overview. Chapter in Issues in Visual and Audio-Visual Speech Processing. MIT Press (2005)","DOI":"10.1016\/B0-08-044854-2\/00912-3"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Bailly, G., Perrier, P., Vatikiotis-Bateson, E.: Audiovisual Speech Processing. Cambridge University Press (2012)","DOI":"10.1017\/CBO9780511843891"},{"key":"6_CR7","unstructured":"Soldatov, S.: Lip reading: Preparing feature vectors. In: Proc. International Conference Graphicon 2003, Moscow, Russia, pp. 254\u2013256 (2003)"},{"key":"6_CR8","unstructured":"Gubochkin, I.: A system for tracking lip contour of a speaker. In: Modern Science: Actual problems of theory and practice. Natural and Technical Sciences, vol.\u00a0(4-5), pp. 20\u201326 (2012) (in Rus.)"},{"issue":"1","key":"6_CR9","doi-asserted-by":"publisher","first-page":"34","DOI":"10.3103\/S1060992X14010068","volume":"23","author":"A. Savchenko","year":"2014","unstructured":"Savchenko, A., Khokhlova, Y.: About neural-network algorithms application in viseme classification problem with face video in audiovisual speech recognition systems. Optical Memory and Neural Networks (Information Optics)\u00a023(1), 34\u201342 (2014)","journal-title":"Optical Memory and Neural Networks (Information Optics)"},{"key":"6_CR10","unstructured":"Krak, Y., Barmak, A., Ternov, A.: Information technology for automatic lip reading of Ukrainian speech. Computational Mathmatics. Kyiv\u00a01, 86\u201395 (2009) (in Rus.)"},{"key":"6_CR11","unstructured":"\u017delezn\u00fd, M., C\u00edsar, P., Krnoul, Z., Ronzhin, A., Li, I., Karpov, A.: Design of Russian audio-visual speech corpus for bimodal speech recognition. In: Proc. 10th International Conference on Speech and Computer SPECOM 2005, Patras, Greece, pp. 397\u2013400 (2005)"},{"key":"6_CR12","unstructured":"Cisar, P., Zelinka, J., Zelezny, M., Karpov, A., Ronzhin, A.: Audio-visual speech recognition for Slavonic languages (Czech and Russian). In: Proc. International Conference SPECOM 2006, St. Petersburg, Russia, pp. 493\u2013498 (2006)"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Karpov, A., Ronzhin, A., Markov, K., Zelezny, M.: Viseme-dependent weight optimization for CHMM-based audio-visual speech recognition. In: Proc. Interspeech 2010 International Conference, Makuhari, Japan, pp. 2678\u20132681 (2010)","DOI":"10.21437\/Interspeech.2010-710"},{"key":"6_CR14","unstructured":"Karpov, A., Ronzhin, A., Kipyatkova, I., Zelezny, M.: Influence of phone-viseme temporal correlations on audio-visual STT and TTS performance. In: Proc. 17th International Congress of Phonetic Sciences ICPhS 2011, Hong Kong, China, pp. 1030\u20131033 (2011)"},{"key":"6_CR15","unstructured":"Grishina, E.: Multimodal Russian corpus (MURCO): First steps. In: Proc. 7th Int. Conf. on Language Resources and Evaluation LREC 2010, Valetta, Malta, pp. 2953\u20132960 (2010)"},{"key":"6_CR16","unstructured":"Chitu, A.G., Rothkrantz, L.J.M.: The influence of video sampling rate on lipreading per-formance. In: Proc. SPECOM 2007, Moscow, Russia, pp. 678\u2013684 (2007)"},{"key":"6_CR17","series-title":"LNAI","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/978-3-642-15760-8_33","volume-title":"Text, Speech and Dialogue","author":"A.G. Chitu","year":"2010","unstructured":"Chitu, A.G., Driel, K., Rothkrantz, L.J.M.: Automatic lip reading in the Dutch language using active appearance models on high speed recordings. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2010. LNCS (LNAI), vol.\u00a06231, pp. 259\u2013266. Springer, Heidelberg (2010)"},{"key":"6_CR18","unstructured":"Chitu, A.G., Rothkrantz, L.J.M.: Dutch multimodal corpus for speech recognition. In: Proc. LREC 2008 Workshop on Multimodal Corpora, Marrakech, Morocco, pp. 56\u201359 (2008)"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Karpov, A., Ronzhin, A., Kipyatkova, I.: Designing a Multimodal Corpus of Audio-Visual Speech using a High-Speed Camera. In: Proc. 11th IEEE International Conference on Signal Processing ICSP 2012, pp. 519\u2013522. IEEE Press, Beijing (2012)","DOI":"10.1109\/ICoSP.2012.6491539"},{"key":"6_CR20","unstructured":"Young, S., et al.: The HTK Book, Version 3.4. Cambridge Univ. Press (2009)"},{"key":"6_CR21","unstructured":"Liang, L., Liu, X., Zhao, Y., Pi, X., Nefian, A.: Speaker independent audio-visual continuous speech recognition. In: Proc. Int. Conf. on Multimedia & Expo ICME 2002, Lausanne, Switzerland, pp. 25\u201328 (2002)"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Viola, P., Jones, M.: Rapid object detection using a boosted cascade of simple features. In: Proc. IEEE Int. Conf. on Computer Vision and Pattern Recognition CVPR 2001, USA, pp. 511\u2013518 (2001)","DOI":"10.1109\/CVPR.2001.990517"},{"issue":"3","key":"6_CR23","first-page":"481","volume":"22","author":"M. Castrillyn","year":"2011","unstructured":"Castrillyn, M., Deniz, O., Hernndez, D., Lorenzo, J.: A comparison of face and facial feature detectors based on the Viola-Jones general object detection framework. Machine Vision and Applications\u00a022(3), 481\u2013494 (2011)","journal-title":"Machine Vision and Applications"},{"key":"6_CR24","unstructured":"Feldhoffer, G., Bardi, T., Takacs, G., Tihanyi, A.: Temporal asymmetry in relations of acoustic and visual features of speech. In: Proc 15th European Signal Processing Conference EUSIPCO 2007, Poznan, Poland, pp. 2341\u20132345 (2007)"},{"key":"6_CR25","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1250\/ast.15.143","volume":"15","author":"K. Sekiyama","year":"1994","unstructured":"Sekiyama, K.: Differences in auditory-visual speech perception between Japanese and America: McGurk effect as a function of incompatibility. Journal of the Acoustical Society of Japan\u00a015, 143\u2013158 (1994)","journal-title":"Journal of the Acoustical Society of Japan"},{"key":"6_CR26","unstructured":"Chen, Y., Hazan, V.: Language effects on the degree of visual influence in audiovisual speech perception. In: Proc. 16th International Congress of Phonetic Sciences ICPhS 2007, Saarbr\u00fccken, Germany, pp. 2177\u20132180 (2007)"},{"key":"6_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1007\/978-3-319-07437-5_35","volume-title":"Universal Access in Human-Computer Interaction. Design and Development Methods for Universal Access","author":"A. Karpov","year":"2014","unstructured":"Karpov, A., Ronzhin, A.: A Universal Assistive Technology with Multimodal Input and Multimedia Output Interfaces. In: Stephanidis, C., Antona, M. (eds.) UAHCI 2014, Part I. LNCS, vol.\u00a08513, pp. 369\u2013378. Springer, Heidelberg (2014)"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-11581-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T03:12:07Z","timestamp":1746414727000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-11581-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319115801","9783319115818"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-11581-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]}}}