{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T03:46:01Z","timestamp":1752551161272,"version":"3.37.3"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319664286"},{"type":"electronic","value":"9783319664293"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-66429-3_76","type":"book-chapter","created":{"date-parts":[[2017,8,12]],"date-time":"2017-08-12T02:02:55Z","timestamp":1502503375000},"page":"757-766","source":"Crossref","is-referenced-by-count":12,"title":["Using a High-Speed Video Camera for Robust Audio-Visual Speech Recognition in Acoustically Noisy Conditions"],"prefix":"10.1007","author":[{"given":"Denis","family":"Ivanko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexey","family":"Karpov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dmitry","family":"Ryumin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Irina","family":"Kipyatkova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anton","family":"Saveliev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Victor","family":"Budkov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dmitriy","family":"Ivanko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Milo\u0161","family":"\u017delezn\u00fd","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,8,13]]},"reference":[{"issue":"9","key":"76_CR1","doi-asserted-by":"crossref","first-page":"1635","DOI":"10.1109\/JPROC.2015.2459017","volume":"103","author":"K Katsaggelos","year":"2015","unstructured":"Katsaggelos, K., Bahaadini, S., Molina, R.: Audiovisual fusion: challenges and new approaches. Proc. IEEE 103(9), 1635\u20131653 (2015)","journal-title":"Proc. IEEE"},{"key":"76_CR2","first-page":"223","volume":"198","author":"A Corradini","year":"2005","unstructured":"Corradini, A., Mehta, M., Bernsen, N.O., Martin, J., Abrilian, S.: Multimodal input fusion in human-computer interaction. Nato Sci. Ser. Comput. Syst. Sci. 198, 223 (2005)","journal-title":"Nato Sci. Ser. Comput. Syst. Sci."},{"key":"76_CR3","unstructured":"Lahat, D., Adall, T., Jutten, C.: Challenges in multimodal data fusion. In: Proceedings of the European Signal Processing Conference, pp. 101\u2013105 (2014)"},{"issue":"4","key":"76_CR4","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1016\/j.specom.2007.11.002","volume":"50","author":"X Shao","year":"2008","unstructured":"Shao, X., Barker, J.: Stream weight estimation for multistream audio-visual speech recognition in a multispeaker environment. Speech Commun. 50(4), 337\u2013353 (2008)","journal-title":"Speech Commun."},{"key":"76_CR5","unstructured":"Chitu, A.G., Rothkrantz, L.J.M.: The influence of video sampling rate on lipreading performance. In: Proceedings of the International Conference on Speech and Computer, SPECOM 2007, Moscow, Russia, pp. 678\u2013684 (2007)"},{"key":"76_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/978-3-642-15760-8_33","volume-title":"Text, Speech and Dialogue","author":"AG Chitu","year":"2010","unstructured":"Chitu, A.G., Driel, K., Rothkrantz, L.J.M.: Automatic lip reading in the Dutch language using active appearance models on high speed recordings. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2010. LNCS(LNAI), vol. 6231, pp. 259\u2013266. Springer, Heidelberg (2010). doi: 10.1007\/978-3-642-15760-8_33"},{"key":"76_CR7","doi-asserted-by":"crossref","unstructured":"Polykovsky, S., Kameda, Y., Ohta, Y.: Facial micro-expressions recognition using high speed camera and 3D-gradient descriptor. In: Proceedings of the 3rd International Conference on Crime Detection and Prevention (ICDP), Tsukuba, Japan, pp. 1\u20136 (2009)","DOI":"10.1049\/ic.2009.0244"},{"key":"76_CR8","unstructured":"Bettadapura, V.: Face expression recognition and analysis: the state of the art. Technical report, pp. 1\u201327. College of Computing, Georgia Institute of Technology, USA (2012)"},{"key":"76_CR9","doi-asserted-by":"crossref","unstructured":"Ohzeki, K.: Video analysis for detecting eye blinking using a high-speed camera. In: Proceedings of the 40th Asilomar Conference on Signals, Systems and Computers (ACSSC), Part 1, Pacific Grove, USA, pp. 1081\u20131085 (2006)","DOI":"10.1109\/ACSSC.2006.354919"},{"key":"76_CR10","unstructured":"Chitu, A.G., Rothkrantz, L.J.M.: On dual view lipreading using high speed camera. In: Proceedings of the 14th Annual Scientific Conference Euromedia, Ghent, Belgium, pp. 43\u201351 (2008)"},{"key":"76_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1007\/978-3-319-43958-7_40","volume-title":"Speech and Computer","author":"V Verkhodanova","year":"2016","unstructured":"Verkhodanova, V., Ronzhin, A., Kipyatkova, I., Ivanko, D., Karpov, A., \u017delezn\u00fd, M.: HAVRUS corpus: high-speed recordings of audio-visual Russian speech. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS, vol. 9811, pp. 338\u2013345. Springer, Cham (2016). doi: 10.1007\/978-3-319-43958-7_40"},{"key":"76_CR12","doi-asserted-by":"crossref","unstructured":"Karpov, A., Ronzhin, A., Markov, K., \u017delezn\u00fd, M.: Viseme-dependent weight optimization for CHMM-based audio-visual speech recognition. In: Proceedings of the Interspeech 2010, pp. 2678\u20132681 (2010)","DOI":"10.21437\/Interspeech.2010-710"},{"issue":"12","key":"76_CR13","doi-asserted-by":"crossref","first-page":"2190","DOI":"10.1134\/S000511791412008X","volume":"75","author":"A Karpov","year":"2014","unstructured":"Karpov, A.: An automatic multimodal speech recognition system with audio and video information. Autom. Remote Control 75(12), 2190\u20132200 (2014)","journal-title":"Autom. Remote Control"},{"key":"76_CR14","unstructured":"Zelezny, M., Csar, P.: Czech audio-visual speech corpus of a car driver for in-vehicle audio-visual speech recognition. In: Proceedings of the International Conference on Audio-Visual Speech Processing (AVSP 2003), pp. 169\u2013173 (2003)"},{"key":"76_CR15","unstructured":"Csar, P., Zelezny, M., Krnoul, Z., Kanis, J., Zelinka, J., Muller, L.: Design and recording of Czech speech corpus for audio-visual continuous speech recognition. In: Proceedings of the International Conference on the Auditory-Visual Speech Processing, pp. 1\u20134 (2005)"},{"key":"76_CR16","unstructured":"Grishina E.: Multimodal Russian corpus (MURCO): first steps. In: Proceedings of the 7th Language Resources and Evaluation Conference (LREC 2010), pp. 2953\u20132960 (2010)"},{"key":"76_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1007\/978-3-319-11581-8_6","volume-title":"Speech and Computer","author":"A Karpov","year":"2014","unstructured":"Karpov, A., Kipyatkova, I., \u017delezn\u00fd, M.: A framework for recording audio-visual speech corpora with a microphone and a high-speed camera. In: Ronzhin, A., Potapova, R., Delic, V. (eds.) SPECOM 2014. LNCS, vol. 8773, pp. 50\u201357. Springer, Cham (2014). doi: 10.1007\/978-3-319-11581-8_6"},{"key":"76_CR18","unstructured":"Chu, S.M., Huang, T.S.: Multi-Modal sensory fusion with application to audio-visual speech recognition. In: Proceedings of the Multi-Modal Speech Recognition Workshop 2002, Greensboro, USA (2002)"},{"issue":"2","key":"76_CR19","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1109\/TCYB.2013.2250954","volume":"44","author":"D Stewart","year":"2014","unstructured":"Stewart, D., Seymour, R., Pass, A., Ming, J.: Robust audio-visual speech recognition under noisy audio-video conditions. IEEE Trans. Cybern. 44(2), 175\u2013184 (2014)","journal-title":"IEEE Trans. Cybern."},{"key":"76_CR20","doi-asserted-by":"crossref","unstructured":"Huang, J., Kingsbury, B.: Audio-visual deep learning for noise robust speech recognition. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 7596\u20137599 (2013)","DOI":"10.1109\/ICASSP.2013.6639140"},{"issue":"4","key":"76_CR21","doi-asserted-by":"crossref","first-page":"1145","DOI":"10.1109\/TASL.2011.2172427","volume":"20","author":"V Estellers","year":"2012","unstructured":"Estellers, V., Gurban, M., Thiran, J.: On dynamic stream weighting for audio-visual speech recognition. IEEE Trans. Audio Speech Lang. Process. 20(4), 1145\u20131157 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-66429-3_76","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T03:38:30Z","timestamp":1659325110000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-66429-3_76"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319664286","9783319664293"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-66429-3_76","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}