{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T20:36:59Z","timestamp":1775335019179,"version":"3.50.1"},"reference-count":14,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008,6]]},"DOI":"10.1109\/cvprw.2008.4563183","type":"proceedings-article","created":{"date-parts":[[2008,7,17]],"date-time":"2008-07-17T23:40:22Z","timestamp":1216338022000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Speaker detection using the timing structure of lip motion and sound"],"prefix":"10.1109","author":[{"family":"Yu Horii","sequence":"first","affiliation":[]},{"family":"Hiroaki Kawashima","sequence":"additional","affiliation":[]},{"family":"Takashi Matsuyama","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"13","doi-asserted-by":"publisher","DOI":"10.1109\/ISCE.2005.1502369"},{"key":"14","year":"0"},{"key":"11","article-title":"multimodal speaker detection using error feedback dynamic bayesian networks","author":"pavlovic?","year":"2000","journal-title":"Proc Computer Vision and Pattern Recognition"},{"key":"12","first-page":"2","article-title":"learning switching linear models of human motion","author":"pavlovic?","year":"2000","journal-title":"Proc Neural Information Processing Systems"},{"key":"3","article-title":"active appearance model","author":"cootes","year":"1998","journal-title":"Proc European Conf Computer Vision"},{"key":"2","year":"0"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1987.4767965"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1109\/89.536930"},{"key":"7","article-title":"interval-based linear hybrid dynamical system for modeling crossmedia timing structures in multimedia signals","author":"kawashima","year":"2007","journal-title":"10th Int Conf Image Analysis and Processing"},{"key":"6","year":"0"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2003.1247172"},{"key":"4","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"maximum likelihood from incomplete data via the em algorithm","volume":"39","author":"dempster","year":"1977","journal-title":"J R Statist Soc B"},{"key":"9","article-title":"facial expression representation based on timing structures in faces","author":"nishiyama","year":"2005","journal-title":"IEEE International Workshop on Analysis and Modeling of Faces and Gestures (W Zhao et al (Eds ) AMFG 2005 LNCS 3723)"},{"key":"8","first-page":"1","article-title":"automatic video recording of lecture's audience with activity analysis and equalization of scale for students observation","volume":"181 189","author":"nishiguchi","year":"2004","journal-title":"Journal of Advanced Computational Intelligence and Intelligent Informatics"}],"event":{"name":"2008 IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops (CVPR Workshops)","location":"Anchorage, AK, USA","start":{"date-parts":[[2008,6,23]]},"end":{"date-parts":[[2008,6,28]]}},"container-title":["2008 IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4558053\/4562948\/04563183.pdf?arnumber=4563183","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T02:12:29Z","timestamp":1738289549000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4563183\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,6]]},"references-count":14,"URL":"https:\/\/doi.org\/10.1109\/cvprw.2008.4563183","relation":{},"subject":[],"published":{"date-parts":[[2008,6]]}}}