{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T08:10:26Z","timestamp":1688458226056},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2013,9,3]],"date-time":"2013-09-03T00:00:00Z","timestamp":1378166400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Pattern Anal Applic"],"published-print":{"date-parts":[[2014,8]]},"DOI":"10.1007\/s10044-013-0350-x","type":"journal-article","created":{"date-parts":[[2013,9,2]],"date-time":"2013-09-02T04:57:30Z","timestamp":1378097850000},"page":"611-621","source":"Crossref","is-referenced-by-count":4,"title":["Visual-speech-pass filtering for robust automatic lip-reading"],"prefix":"10.1007","volume":"17","author":[{"given":"Jong-Seok","family":"Lee","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,9,3]]},"reference":[{"issue":"1","key":"350_CR1","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1109\/TCSVT.2004.837017","volume":"15","author":"A. Amer","year":"2005","unstructured":"Amer A, Dubois E (2005) Fast and reliable structure-oriented video noise estimation. IEEE Trans Circuits Syst Video Technol 15(1):113\u2013118","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"350_CR2","unstructured":"Arsic I, Thiran JP (2006) Mutual information eigenlips for audio-visual speech recognition. In: Proceedings of European Signal Processing Conference Florence, Italy"},{"key":"350_CR3","doi-asserted-by":"crossref","unstructured":"Bregler C, Konig Y (1994) Eigenlips for robust speech recognition. In: Proceedings of International Conference Acoustics, Speech and Signal Processing, Adelaide, Australia, vol. 2, pp 669\u2013672","DOI":"10.1109\/ICASSP.1994.389567"},{"issue":"1","key":"350_CR4","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1109\/6046.985551","volume":"4","author":"CC Chibelushi","year":"2002","unstructured":"Chibelushi CC, Deravi F, Mason JSD (2002) A review of speech-based bimodal recognition. IEEE Trans Multimed 4(1):23\u201337","journal-title":"IEEE Trans Multimed"},{"issue":"8","key":"350_CR5","doi-asserted-by":"crossref","first-page":"1192","DOI":"10.1109\/83.605417","volume":"6","author":"GI Chiou","year":"1997","unstructured":"Chiou GI, Hwang JN (1997) Lipreading from color video. IEEE Trans Image Process 6(8):1192\u20131195","journal-title":"IEEE Trans Image Process"},{"issue":"3","key":"350_CR6","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1109\/6046.865479","volume":"2","author":"S Dupont","year":"2000","unstructured":"Dupont S, Luettin J (2000) Audio-visual speech modeling for continuous speech recognition. IEEE Trans Multimed 2(3):141\u2013151","journal-title":"IEEE Trans Multimed"},{"key":"350_CR7","doi-asserted-by":"crossref","unstructured":"Fox NA, O\u2019Mullane BA, Reilly RB (2005) VALID: a new practical audio-visual database, and comparative results. In: Proceedings of International Conference Audio- and Video-Based Biometric Person Authentication, New York, USA, pp 777\u2013786","DOI":"10.1007\/11527923_81"},{"issue":"4","key":"350_CR8","doi-asserted-by":"crossref","first-page":"611","DOI":"10.1109\/49.81956","volume":"9","author":"HW Frowein","year":"1991","unstructured":"Frowein HW, Smoorenburg GF, Pyters L, Schinkel D (1991) Improved speech recognition through videotelephony: experiments with the hard of hearing. IEEE J Sel Areas Commun 9(4):611\u2013616","journal-title":"IEEE J Sel Areas Commun"},{"key":"350_CR9","unstructured":"Gurbuz S, Tufekci Z, Patterson E, Gowdy J (2001) Application of affine-invariant Fourier descriptors to lipreading for audio-visual speech recognition. In: Proceedings of International Conference on Acoustics, Speech and Signal Processing, Salt Lake City, UT, USA. vol 1, pp 177\u2013180"},{"key":"350_CR10","unstructured":"Hennecke ME, Prasad KV, Stork DG (1995) Automatic speech recognition system using acoustic and visual signals. In: Proceedings of 29th Asilomar Conference on Signals, Systems and Computers, Pacific Grove, CA, USA, vol 2, pp 1214\u20131218"},{"issue":"4","key":"350_CR11","doi-asserted-by":"crossref","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hermansky","year":"1994","unstructured":"Hermansky H, Morgan N (1994) RASTA processing of speech. IEEE Trans Speech Audio Process 2(4):578\u2013589","journal-title":"IEEE Trans Speech Audio Process"},{"key":"350_CR12","unstructured":"Huang X, Acero A, Hon HW (2001) Spoken language processing: a guide to theory, algorithm, and system development. Prentice-Hall, Upper Saddle River"},{"issue":"4","key":"350_CR13","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1109\/89.848222","volume":"8","author":"HY Jung","year":"2000","unstructured":"Jung HY, Lee SY (2000) On the temporal decorrelation of feature parameters for noise-robust speech recognition. IEEE Trans Speech Audio Process 8(4):407\u2013416","journal-title":"IEEE Trans Speech Audio Process"},{"issue":"1\u20132","key":"350_CR14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.specom.2004.01.003","volume":"43","author":"MN Kaynak","year":"2004","unstructured":"Kaynak MN, Zhi Q, Cheok AD, Sengupta K, Jian Z, Chung KC (2004) Lip geometric features for human-computer interaction using bimodal speech recognition: comparison and analysis. Speech Commun 43(1\u20132):1\u201316","journal-title":"Speech Commun"},{"key":"350_CR15","unstructured":"Lan, Y, Harvey, R, Theobald, BJ, Ong, EJ., Bowden, R (2009) Comparing visual features for lipreading. In: Proceedings of International Conference on Audio-Visual Speech Processing, Norwich, UK, pp 102\u2013106"},{"key":"350_CR16","unstructured":"Lan, Y, Theobald, BJ, Harvey, R, Ong, EJ, Bowden, R (2010) Improving visual features for lip-reading. In: Proceedings of International Conference on Audio-Visual Speech Processing, Kanagawa, Japan, pp 142\u2013147"},{"issue":"5","key":"350_CR17","doi-asserted-by":"crossref","first-page":"767","DOI":"10.1109\/TMM.2008.922789","volume":"10","author":"JS Lee","year":"2008","unstructured":"Lee JS, Park CH (2008) Robust audio-visual speech recognition based on late integration. IEEE Trans Multimed 10(5):767\u2013779","journal-title":"IEEE Trans Multimed"},{"issue":"4","key":"350_CR18","doi-asserted-by":"crossref","first-page":"1188","DOI":"10.1109\/TSMCB.2009.2036753","volume":"40","author":"JS Lee","year":"2010","unstructured":"Lee JS, Park CH (2010) Hybrid simulated annealing and its application to optimization of hidden markov models for visual speech recognition. IEEE Trans Syst Man Cybern B 40(4):1188\u20131196","journal-title":"IEEE Trans Syst Man Cybern B"},{"key":"350_CR19","doi-asserted-by":"crossref","unstructured":"Lucey S (2003) An evaluation of visual speech features for the tasks of speech and speaker recognition. In: Proceedings of International Conference on Audio- and Video-Based Biometric Person Authentication, Guildford, UK, pp 260\u2013267 (2003)","DOI":"10.1007\/3-540-44887-X_31"},{"issue":"2","key":"350_CR20","doi-asserted-by":"crossref","first-page":"198","DOI":"10.1109\/34.982900","volume":"24","author":"I Matthews","year":"2002","unstructured":"Matthews I, Cootes TF, Bangham JA, Cox S, Harvey R (2002) Extraction of visual features for lipreading. IEEE Trans Pattern Anal Mach Intell 24(2):198\u2013213","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"350_CR21","doi-asserted-by":"crossref","unstructured":"Matthews I, Potamianos G, Neti C, Luettin J (2001) A comparison of model and transform-based visual features for audio-visual LVCSR. In: Proceedings of International Conference on Multimedia and Expo, Tokyo, Japan, pp 22\u201325","DOI":"10.1109\/ICME.2001.1237849"},{"key":"350_CR22","unstructured":"Munhall K, Vatikiotis-Bateson E (1998) The moving face during speech communication. In: Campbell R, Dodd B, Burnham, D (eds) Hearing by eye II: advances in the psychology of speechreading and audio-visual speech. Psychology Press, Hove, pp 123\u2013142"},{"key":"350_CR23","unstructured":"Ngiam J, Khosla A, Kim M, Nam J, Lee H, Ng AY (2011) Multimodal deep learning. In: Proceedings of International Conference on Machine Learning, Bellevue, WA, USA (2011)"},{"key":"350_CR24","doi-asserted-by":"crossref","unstructured":"Ohala JJ (1975) The temporal regulation of speech. In: Fant G, Tatham MA (eds) Auditory analysis and perception. Academic Press, London, pp 431\u2013453","DOI":"10.1016\/B978-0-12-248550-3.50032-5"},{"key":"350_CR25","unstructured":"Oppenheim AV, Schafer RW (1999) Discrete-time signal processing, 2nd edn. Prentice-Hall, Upper Saddle River (1999)"},{"key":"350_CR26","doi-asserted-by":"crossref","first-page":"2965","DOI":"10.1016\/j.patcog.2008.05.008","volume":"41","author":"D O\u2019Shaughnessy","year":"2008","unstructured":"O\u2019Shaughnessy D (2008) Automatic speech recognition: history, methods and challenges. Pattern Recognit 41:2965\u20132979","journal-title":"Pattern Recognit"},{"key":"350_CR27","unstructured":"Petajan ED (1985) Automatic lipreading to enhance speech recognition. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, San Francisco, CA, USA, pp. 40\u201347"},{"key":"350_CR28","doi-asserted-by":"crossref","unstructured":"Potamianos G, Graf HP (1998) Linear discriminant analysis for speechreading. In: Proceedings of IEEE Workshop on Multimedia Processing, Redeondo Beach, CA, USA, pp 221\u2013226","DOI":"10.1109\/MMSP.1998.738938"},{"key":"350_CR29","doi-asserted-by":"crossref","unstructured":"Potamianos G, Graf HP, Cosatto E (1998) An image transform approach for HMM based automatic lipreading. In: Proceedings of International Conference on Image Processing, Chicago, IL, USA, vol 3, pp 173\u2013177","DOI":"10.1109\/ICIP.1998.999008"},{"key":"350_CR30","doi-asserted-by":"crossref","unstructured":"Potamianos G, Neti C (2003) Audio-visual speech recognition in challenging environments. In: Proceedings of Eurospeech, Geneva, Switzerland, pp 1293\u20131296","DOI":"10.21437\/Eurospeech.2003-410"},{"issue":"9","key":"350_CR31","doi-asserted-by":"crossref","first-page":"1306","DOI":"10.1109\/JPROC.2003.817150","volume":"91","author":"G Potamianos","year":"2003","unstructured":"Potamianos G, Neti C, Gravier G, Garg A, Senior AW (2003) Recent advances in the automatic recognition of audiovisual speech. Proc IEEE 91(9):1306\u20131326","journal-title":"Proc IEEE"},{"key":"350_CR32","doi-asserted-by":"crossref","unstructured":"Rabi, G, Lu, SW (1997) Energy minimization for extracting mouth curves in a facial image. In: Proceedings of International Conference on Intelligent Information Systems, Bahamas, pp 381\u2013385","DOI":"10.1109\/IIS.1997.645307"},{"key":"350_CR33","doi-asserted-by":"crossref","unstructured":"Saenko K, Darrell T, Glass J (2004) Articulatory features for robust visual speech recognition. In: Proceedings of International Conference on Multimodal Interfaces, State College, PA, USA, pp 152\u2013158","DOI":"10.1145\/1027933.1027960"},{"key":"350_CR34","doi-asserted-by":"crossref","first-page":"1700","DOI":"10.1109\/TPAMI.2008.303","volume":"31","author":"K Saenko","year":"2009","unstructured":"Saenko K, Livescu K, Glass J, Darrell T (2009) Multistream articulatory feature-based models for visual speech recognition IEEE Trans Pattern Anal Mach Intell 31:1700\u20131707","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"350_CR35","doi-asserted-by":"crossref","unstructured":"Seymour R, Stewart D, Ming J (2008) Comparison of image transform-based features for visual speech recognition in clean and corrupted videos. EURASIP J Image Video Process","DOI":"10.1155\/2008\/810362"},{"issue":"5","key":"350_CR36","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1109\/89.536928","volume":"4","author":"PL Silsbee","year":"1996","unstructured":"Silsbee PL, Bovik AC (1996) Computer lipreading for improved accuracy in automatic speech recognition. IEEE Trans Speech Audio Process 4(5):337\u2013351","journal-title":"IEEE Trans Speech Audio Process"},{"key":"350_CR37","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1002\/(SICI)1099-0720(199604)10:2<121::AID-ACP371>3.0.CO;2-V","volume":"10","author":"M Vitkovitch","year":"1996","unstructured":"Vitkovitch M, Barber P (1996) Visible speech as a function of image quality: effects of display parameters on lipreading ability. Appl Cogn Psychol 10:121\u2013140","journal-title":"Applied Cognitive Psychology"},{"issue":"7","key":"350_CR38","doi-asserted-by":"crossref","first-page":"1254","DOI":"10.1109\/TMM.2009.2030637","volume":"11","author":"G Zhao","year":"2009","unstructured":"Zhao G, Barnard M, Pietik\u00e4inen M (2009) Lipreading with local spatiotemporal descriptors. IEEE Trans Multimed 11(7):1254\u20131265","journal-title":"IEEE Trans Multimed"}],"container-title":["Pattern Analysis and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-013-0350-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10044-013-0350-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-013-0350-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T07:36:01Z","timestamp":1688456161000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10044-013-0350-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,9,3]]},"references-count":38,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2014,8]]}},"alternative-id":["350"],"URL":"https:\/\/doi.org\/10.1007\/s10044-013-0350-x","relation":{},"ISSN":["1433-7541","1433-755X"],"issn-type":[{"value":"1433-7541","type":"print"},{"value":"1433-755X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,9,3]]}}}