{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:35:41Z","timestamp":1750307741638,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2007,11,12]],"date-time":"2007-11-12T00:00:00Z","timestamp":1194825600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2007,11,12]]},"DOI":"10.1145\/1322192.1322231","type":"proceedings-article","created":{"date-parts":[[2007,11,30]],"date-time":"2007-11-30T14:39:06Z","timestamp":1196433546000},"page":"220-227","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Temporal filtering of visual speech for audio-visual speech recognition in acoustically and visually challenging environments"],"prefix":"10.1145","author":[{"given":"Jong-Seok","family":"Lee","sequence":"first","affiliation":[{"name":"KAIST, Daejeon, South Korea"}]},{"given":"Cheol Hoon","family":"Park","sequence":"additional","affiliation":[{"name":"KAIST, Daejeon, South Korea"}]}],"member":"320","published-online":{"date-parts":[[2007,11,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/6046.865479"},{"key":"e_1_3_2_1_2_1","volume-title":"Audio-visual speech recognition using an infrared headset. Speech Communication, 44(1--4):83--96","author":"Huang J.","year":"2004","unstructured":"Huang , J. , Potamianos , G. , Connell , J. , Neti , C. Audio-visual speech recognition using an infrared headset. Speech Communication, 44(1--4):83--96 , 2004 . Huang, J., Potamianos, G., Connell, J., Neti, C. Audio-visual speech recognition using an infrared headset. Speech Communication, 44(1--4):83--96, 2004."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.857572"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1093\/cercor\/bhl024"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1348\/000712601162220"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1038\/264746a0"},{"key":"e_1_3_2_1_7_1","first-page":"51","article-title":"Some preliminaries to a comprehensive account of audio-visual speech perception. In Hearing by Eye: The Psychology of Lip-reading, Dodd, B., Campbell, R. (eds.), Lawrence Erlbarum, London","volume":"3","author":"Summerfield A. Q","year":"1987","unstructured":"Summerfield , A. Q . Some preliminaries to a comprehensive account of audio-visual speech perception. In Hearing by Eye: The Psychology of Lip-reading, Dodd, B., Campbell, R. (eds.), Lawrence Erlbarum, London , UK , 1987 , 3 -- 51 . Summerfield, A. Q. Some preliminaries to a comprehensive account of audio-visual speech perception. In Hearing by Eye: The Psychology of Lip-reading, Dodd, B., Campbell, R. (eds.), Lawrence Erlbarum, London, UK, 1987, 3--51.","journal-title":"UK"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/6046.985551"},{"key":"e_1_3_2_1_9_1","volume-title":"Digital Image Processing","author":"Gonzalez R. C.","year":"2002","unstructured":"Gonzalez , R. C. , Woods , R. E. Digital Image Processing . Prentice-Hall , Upper Saddle River, NJ, 2002 . Gonzalez, R. C., Woods, R. E. Digital Image Processing. Prentice-Hall, Upper Saddle River, NJ, 2002."},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. European Conf. Speech Communication and Technology","author":"Potamianos G.","year":"2003","unstructured":"Potamianos , G. , Neti , C. Audio-visual speech recognition in challenging environments . In Proc. European Conf. Speech Communication and Technology ( Geneva, Switzerland , 2003 ), 1293--1296. Potamianos, G., Neti, C. Audio-visual speech recognition in challenging environments. In Proc. European Conf. Speech Communication and Technology (Geneva, Switzerland, 2003), 1293--1296."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1027933.1027960"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.2006.384382"},{"key":"e_1_3_2_1_13_1","volume-title":"Spoken Language Processing: A Guide to Theory, Algorithm, and System Development","author":"Huang X.-D.","year":"2001","unstructured":"Huang , X.-D. , Acero , A. , Hon , H.-W. Spoken Language Processing: A Guide to Theory, Algorithm, and System Development . Prentice-Hall , Upper Saddle River, NJ, 2001 . Huang, X.-D., Acero, A., Hon, H.-W. Spoken Language Processing: A Guide to Theory, Algorithm, and System Development. Prentice-Hall, Upper Saddle River, NJ, 2001."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.940796"},{"key":"e_1_3_2_1_15_1","volume-title":"Lip geometric features for human-computer interaction using bimodal speech recognition: comparison and analysis. Speech Communication, 43(1--2):1--16","author":"Kaynak M. N.","year":"2004","unstructured":"Kaynak , M. N. , Zhi , Q. , Cheok , A. D. , Sengupta , K. , Jiang , Z. , Chung , K. C. Lip geometric features for human-computer interaction using bimodal speech recognition: comparison and analysis. Speech Communication, 43(1--2):1--16 , 2004 . Kaynak, M. N., Zhi, Q., Cheok, A. D., Sengupta, K., Jiang, Z., Chung, K. C. Lip geometric features for human-computer interaction using bimodal speech recognition: comparison and analysis. Speech Communication, 43(1--2):1--16, 2004."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/1762222.1762257"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP.2001.962802"},{"issue":"1","key":"e_1_3_2_1_18_1","first-page":"123","article-title":"Bimodal speech recognition using robust feature extraction of lip movement under uncontrolled illumination conditions","volume":"14","author":"Lee J.-S.","year":"2004","unstructured":"Lee , J.-S. , Shim , S. H. , Kim , S. Y. , Park , C. H . Bimodal speech recognition using robust feature extraction of lip movement under uncontrolled illumination conditions . Telecommunications Review , 14 ( 1 ): 123 -- 134 , Feb. 2004 . Lee, J.-S., Shim, S. H., Kim, S. Y., Park, C. H. Bimodal speech recognition using robust feature extraction of lip movement under uncontrolled illumination conditions. Telecommunications Review, 14(1):123--134, Feb. 2004.","journal-title":"Telecommunications Review"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1994.389567"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2002.1021886"},{"key":"e_1_3_2_1_21_1","first-page":"502","article-title":"The intrinsic bimodality of speech communication and the synthesis of talking faces. In The Structure of Multimodal Dialogue II, Taylor, M. M., Nel, F., Bouwhuis, D. (eds.), John Benjamins, Amsterdam","volume":"485","author":"Beno\u00eet C","year":"2000","unstructured":"Beno\u00eet , C . The intrinsic bimodality of speech communication and the synthesis of talking faces. In The Structure of Multimodal Dialogue II, Taylor, M. M., Nel, F., Bouwhuis, D. (eds.), John Benjamins, Amsterdam , The Netherlands , 2000 , 485 -- 502 . Beno\u00eet, C. The intrinsic bimodality of speech communication and the synthesis of talking faces. In The Structure of Multimodal Dialogue II, Taylor, M. M., Nel, F., Bouwhuis, D. (eds.), John Benjamins, Amsterdam, The Netherlands, 2000, 485--502.","journal-title":"The Netherlands"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00056-9"},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. Conf. Australasian Computer Science","author":"Lewis T. W.","year":"2004","unstructured":"Lewis , T. W. , Powers , D. M. W. Sensor fusion weighting measures in audio-visual speech recognition . In Proc. Conf. Australasian Computer Science ( Dunedin, New Zealand , 2004 ), 305--314. Lewis, T. W., Powers, D. M. W. Sensor fusion weighting measures in audio-visual speech recognition. In Proc. Conf. Australasian Computer Science (Dunedin, New Zealand, 2004), 305--314."},{"key":"e_1_3_2_1_24_1","volume-title":"Hearing by Eye II: Advances in the Psychology of Speechreading and Audio-Visual Speech","author":"Munhall K.","year":"1998","unstructured":"Munhall , K. , Vatikiotis-Bateson , E. The moving face during speech communication . In Hearing by Eye II: Advances in the Psychology of Speechreading and Audio-Visual Speech , Campbell, R., Dodd, B., Burnham, D. (eds.), Psychology Press , Hove, UK , 1998 , 123--142. Munhall, K., Vatikiotis-Bateson, E. The moving face during speech communication. In Hearing by Eye II: Advances in the Psychology of Speechreading and Audio-Visual Speech, Campbell, R., Dodd, B., Burnham, D. (eds.), Psychology Press, Hove, UK, 1998, 123--142."},{"key":"e_1_3_2_1_25_1","volume-title":"Discrete-Time Signal Processing","author":"Oppenheim A. V.","year":"1999","unstructured":"Oppenheim , A. V. , Schafer , W. W. Discrete-Time Signal Processing . Prentice-Hall , Upper Saddle River, NJ, 1999 . Oppenheim, A. V., Schafer, W. W. Discrete-Time Signal Processing. Prentice-Hall, Upper Saddle River, NJ, 1999."},{"key":"e_1_3_2_1_26_1","volume-title":"Fundamentals of Electronic Image Processing","author":"Weeks","year":"1996","unstructured":"Weeks Jr ., A. R. Fundamentals of Electronic Image Processing . SPIE\/IEEE Press , Bellingham, WA , 1996 . Weeks Jr., A. R. Fundamentals of Electronic Image Processing. SPIE\/IEEE Press, Bellingham, WA, 1996."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1099-0720(199604)10:2<121::AID-ACP371>3.0.CO;2-V"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/89.848222"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/89.326616"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780198538493.001.0001","volume-title":"Neural Networks for Pattern Recognition","author":"Bishop C.","year":"1995","unstructured":"Bishop , C. Neural Networks for Pattern Recognition . Oxford University Press , UK , 1995 . Bishop, C. Neural Networks for Pattern Recognition. Oxford University Press, UK, 1995."}],"event":{"name":"ICMI07: International Conference on Multimodal Interface","sponsor":["ACM Association for Computing Machinery","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Nagoya Aichi Japan","acronym":"ICMI07"},"container-title":["Proceedings of the 9th international conference on Multimodal interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1322192.1322231","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1322192.1322231","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T13:38:59Z","timestamp":1750253939000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1322192.1322231"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,11,12]]},"references-count":30,"alternative-id":["10.1145\/1322192.1322231","10.1145\/1322192"],"URL":"https:\/\/doi.org\/10.1145\/1322192.1322231","relation":{},"subject":[],"published":{"date-parts":[[2007,11,12]]},"assertion":[{"value":"2007-11-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}