{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T15:10:06Z","timestamp":1751814606951,"version":"3.41.0"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319995786"},{"type":"electronic","value":"9783319995793"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-99579-3_26","type":"book-chapter","created":{"date-parts":[[2018,8,24]],"date-time":"2018-08-24T07:36:09Z","timestamp":1535096169000},"page":"245-254","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Designing Advanced Geometric Features for Automatic Russian Visual Speech Recognition"],"prefix":"10.1007","author":[{"given":"Denis","family":"Ivanko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dmitry","family":"Ryumin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandr","family":"Axyonov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Milo\u0161","family":"\u017delezn\u00fd","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,25]]},"reference":[{"key":"26_CR1","series-title":"Signals and Communication Technology","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5779-3","volume-title":"Automatic Speech Recognition","author":"D Yu","year":"2015","unstructured":"Yu, D., Deng, L.: Automatic Speech Recognition. SCT. Springer, London (2015). https:\/\/doi.org\/10.1007\/978-1-4471-5779-3"},{"key":"26_CR2","doi-asserted-by":"publisher","first-page":"746","DOI":"10.1038\/264746a0","volume":"264","author":"H McGurk","year":"1976","unstructured":"McGurk, H., MacDonald, J.: Hearing lips and seeing voices. Nature 264, 746\u2013748 (1976)","journal-title":"Nature"},{"key":"26_CR3","first-page":"23","volume":"22","author":"G Potamianos","year":"2004","unstructured":"Potamianos, G., Neti, C., Matthews, I.: Audio-visual automatic speech recognition: an overview. Issues Audio Vis. Speech Process. 22, 23 (2004)","journal-title":"Issues Audio Vis. Speech Process."},{"key":"26_CR4","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1016\/j.imavis.2014.06.004","volume":"32","author":"Z Zhou","year":"2014","unstructured":"Zhou, Z., Zhao, G., Hong, X., Pietikainen, M.: A review of recent advances in visual speech decoding. Image Vis. Comput. 32, 590\u2013605 (2014)","journal-title":"Image Vis. Comput."},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Bowden, R., et al.: Recent developments in automated lip-reading. In: Proceedings of SPIE, Optics and Photonics for Counterterrorism, Crime Fighting and Defence IX, vol. 8901, p. 13 (2013)","DOI":"10.1117\/12.2029464"},{"issue":"9","key":"26_CR6","doi-asserted-by":"publisher","first-page":"1635","DOI":"10.1109\/JPROC.2015.2459017","volume":"103","author":"K Katsaggelos","year":"2015","unstructured":"Katsaggelos, K., Bahaadini, S., Molina, R.: Audiovisual fusion: challenges and new approaches. Proc. IEEE 103(9), 1635\u20131653 (2015)","journal-title":"Proc. IEEE"},{"issue":"1\u20134","key":"26_CR7","first-page":"35","volume":"10","author":"TW Seong","year":"2018","unstructured":"Seong, T.W., Ibrahim, M.Z.: A review of audio-visual speech recognition. J. Telecommun. Electron. Comput. Eng. 10(1\u20134), 35\u201340 (2018)","journal-title":"J. Telecommun. Electron. Comput. Eng."},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Lee, B., et al.: AVICAR: audio-visual speech corpus in a car environment. In: Proceedings of Interspeech 2004, pp. 380\u2013383 (2004)","DOI":"10.21437\/Interspeech.2004-424"},{"key":"26_CR9","unstructured":"Cox, S., Harvey, R., Lan, Y., Newmann, J., Theobald, B.: The challenge of multispeaker lip-reading. In: Proceedings of the International Conference Auditory-Visual Speech Process (AVSP), pp. 179\u2013184 (2008)"},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Patterson, E., Gurbuz, E., Tufekci, Z., Gowdy, J.: CUAVE: a new audio-visual database for multimodal human-computer interface research. In: Proceedings of the IEEE ICASSP 2002, vol. 2, pp. 2017\u20132020 (2002)","DOI":"10.1109\/ICASSP.2002.5745028"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Hazen, T., Saenko, K., La, C., Glass, J.: A segment-base audio-visual speech recognizer: data collection, development, and initial experiments. In: Proceedings of the International Conference Multimodal Interfaces, pp. 235\u2013242 (2004)","DOI":"10.1145\/1027933.1027972"},{"key":"26_CR12","unstructured":"Lucey, P., Potaminanos, G., Sridharan, S.: Patch-based analysis of visual speech from multiple views. In: Proceedings of AVSP 2008, pp. 69\u201374 (2008)"},{"key":"26_CR13","doi-asserted-by":"crossref","unstructured":"Abhishek, N., Prasanta, K.G.: PRAV: a phonetically rich audio visual corpus. In: Proceedings of Interspeech 2017, pp. 3747\u20133751 (2017)","DOI":"10.21437\/Interspeech.2017-242"},{"key":"26_CR14","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1007\/978-3-319-43958-7_40","volume-title":"Speech and Computer","author":"V Verkhodanova","year":"2016","unstructured":"Verkhodanova, V., Ronzhin, A., Kipyatkova, I., Ivanko, D., Karpov, A., \u017delezn\u00fd, M.: HAVRUS corpus: high-speed recordings of audio-visual Russian speech. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS (LNAI), vol. 9811, pp. 338\u2013345. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-43958-7_40"},{"issue":"7","key":"26_CR15","doi-asserted-by":"publisher","first-page":"1936","DOI":"10.1109\/TASL.2012.2191956","volume":"20","author":"J Newman","year":"2012","unstructured":"Newman, J., Cox, S.: Language identification using visual features. Proc. IEEE Audio Speech Lang. Process. 20(7), 1936\u20131947 (2012)","journal-title":"Proc. IEEE Audio Speech Lang. Process."},{"key":"26_CR16","doi-asserted-by":"crossref","unstructured":"Lan, Y., Theobald, B., Harvey, R.: View independent computer lip-reading. In: Proceedings of the International Conference Multimedia Expo (ICME), pp. 432\u2013437 (2012)","DOI":"10.1109\/ICME.2012.192"},{"issue":"1","key":"26_CR17","first-page":"181","volume":"36","author":"Z Zhou","year":"2014","unstructured":"Zhou, Z., Hong, X., Zhao, G., Pietikainen, M.: A compact representation of visual speech data using latent variables. Proc. IEEE Trans. Pattern Anal. Mach. Intell. 36(1), 181\u2013187 (2014)","journal-title":"Proc. IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"7","key":"26_CR18","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1109\/TMM.2009.2030637","volume":"11","author":"G Zhao","year":"2009","unstructured":"Zhao, G., Barnard, M., Pietikainen, M.: Lipreading with local spatiotemporal descriptors. Proc. IEEE Trans. Multimed. 11(7), 1254\u20131265 (2009)","journal-title":"Proc. IEEE Trans. Multimed."},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"Estellers, V., Thiran, J.: Multi-pose lipreading and audio-visual speech recognition. EURALISP J. Adv. Signal Process. 51 (2012)","DOI":"10.1186\/PREACCEPT-2074613880613707"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Hong, X., Yao, H., Wan, Y., Chen, R.: A PCA Based visual DCT feature extraction method for lip-reading. In: Proceedings of the International Conference Intelligent Information Hiding Multimedia, Signal Process, pp. 321\u2013326 (2006)","DOI":"10.1109\/IIH-MSP.2006.265008"},{"issue":"10","key":"26_CR21","doi-asserted-by":"publisher","first-page":"2879","DOI":"10.1109\/TIP.2006.877528","volume":"15","author":"H Cetingul","year":"2006","unstructured":"Cetingul, H., Yemez, Y., Erzin, E., Tekalp, A.: Discriminative analysis of lip motion features for speaker identification and speech-reading. Proc. IEEE Trans. Image Process. 15(10), 2879\u20132891 (2006)","journal-title":"Proc. IEEE Trans. Image Process."},{"key":"26_CR22","unstructured":"Yoshinaga, T., Tamura, S., Iwano, K., Furui, S.: Audio-visual speech recognition using lip-movement extracted from side-face images. In: Proceedings of the International Conference Auditory-Visual Speech Processing (AVSP), pp. 117\u2013120 (2003)"},{"key":"26_CR23","unstructured":"Lan, Y., Theobald, B., Harvey, R., Ong, E., Bowden, R.: Improving visual features for lip-reading. In: Proceedings of the International Conference Auditory Visual Speech Processing (AVSP), pp. 142\u2013147 (2010)"},{"issue":"44","key":"26_CR24","first-page":"7","volume":"9","author":"N Radha","year":"2016","unstructured":"Radha, N., Shahina, A., Khan, A.: An improved visual speech recognition of isolated words using combined pixel and geometric features. Proc. J. Sci. Technol. 9(44), 7 (2016)","journal-title":"Proc. J. Sci. Technol."},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Rahmani, M.H., Alamsganj, F.: Lip-reading via a DNN-HMM hybrid system using combination of the image-based and model-based features. In: 3D International Conference on Pattern Recognition and Image Analysis, pp. 195\u2013199 (2017)","DOI":"10.1109\/PRIA.2017.7983045"},{"key":"26_CR26","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1007\/978-3-319-66429-3_76","volume-title":"Speech and Computer","author":"D Ivanko","year":"2017","unstructured":"Ivanko, D., et al.: Using a high-speed video camera for robust audio-visual speech recognition in acoustically noisy conditions. In: Karpov, A., Potapova, R., Mporas, I. (eds.) SPECOM 2017. LNCS (LNAI), vol. 10458, pp. 757\u2013766. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-66429-3_76"},{"key":"26_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-319-54184-6_6","volume-title":"Computer Vision \u2013 ACCV 2016","author":"JS Chung","year":"2017","unstructured":"Chung, J.S., Zisserman, A.: Lip reading in the wild. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10112, pp. 87\u2013103. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54184-6_6"},{"key":"26_CR28","unstructured":"Implementation of Computer Vision Library. https:\/\/github.com\/davisking\/dlib . Accessed 30 Apr 2018"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Baltrusaitis, T., Deravi, F., Morency, L.: 3D constrained local model for rigid and non-rigid facial tracking. In: Computer Vision and Pattern Recognition (CVPR), pp. 2610\u20132617 (2012)","DOI":"10.1109\/CVPR.2012.6247980"},{"key":"26_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.imavis.2016.03.003","volume":"51","author":"D Howell","year":"2016","unstructured":"Howell, D., Cox, S., Theobald, B.: Visual units and confusion modelling for automatic lip-reading. Image Vis. Comput. 51, 1\u201312 (2016)","journal-title":"Image Vis. Comput."},{"key":"26_CR31","unstructured":"Description of Euclidean Distance Calculation. https:\/\/en.wikipedia.org\/wiki\/Euclidean_distance . Accessed 30 Apr 2018"},{"key":"26_CR32","unstructured":"Machine Learning Toolkit. http:\/\/scikit-learn.org\/stable\/ . Accessed 30 Apr 2018"},{"key":"26_CR33","doi-asserted-by":"crossref","unstructured":"Ivanko, D., et al.: Multimodal speech recognition: increasing accuracy using high-speed video data. J. Multimodal User Interfaces (JMUI) (2018, in press)","DOI":"10.1007\/s12193-018-0267-1"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99579-3_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T14:41:12Z","timestamp":1751812872000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319995786","9783319995793"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99579-3_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}