{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T14:37:23Z","timestamp":1780411043297,"version":"3.54.1"},"reference-count":11,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009,4]]},"DOI":"10.1109\/icassp.2009.4960522","type":"proceedings-article","created":{"date-parts":[[2009,5,27]],"date-time":"2009-05-27T15:13:38Z","timestamp":1243437218000},"page":"4069-4072","source":"Crossref","is-referenced-by-count":35,"title":["Multi-modal speaker diarization of real-world meetings using compressed-domain video features"],"prefix":"10.1109","author":[{"given":"Gerald","family":"Friedland","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hayley","family":"Hung","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chuohao","family":"Yeo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2004.827503"},{"key":"2","first-page":"565","article-title":"speaker localisation using audio-visual synchrony: an empirical study","volume":"2728","author":"nock","year":"2003","journal-title":"Lecture Notes in Computer Science"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(98)00066-1"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1177\/009365085012002004"},{"key":"7","article-title":"the icsi rt07s speaker diarization system","author":"wooters","year":"2007","journal-title":"Proceedings of the Rich Transcription 2007 Meeting Recognition Evaluation Workshop"},{"key":"6","article-title":"the ami meeting corpus: a pre-announcement","author":"carletta","year":"2005","journal-title":"Joint Workshop on Machine Learning and Multimodal Interaction (MLMI)"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2006.283"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP.2006.285274"},{"key":"9","year":"0"},{"key":"8","article-title":"correlating audio-visual cues in a dominance estimation framework","author":"hung","year":"2008","journal-title":"CVPR Workshop on Human Communicative Behavior Analysis"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2007.1077"}],"event":{"name":"ICASSP 2009 - 2009 IEEE International Conference on Acoustics, Speech and Signal Processing","location":"Taipei, Taiwan","start":{"date-parts":[[2009,4,19]]},"end":{"date-parts":[[2009,4,24]]}},"container-title":["2009 IEEE International Conference on Acoustics, Speech and Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4912736\/4959496\/04960522.pdf?arnumber=4960522","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,17]],"date-time":"2017-03-17T16:40:40Z","timestamp":1489768840000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4960522\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,4]]},"references-count":11,"URL":"https:\/\/doi.org\/10.1109\/icassp.2009.4960522","relation":{},"subject":[],"published":{"date-parts":[[2009,4]]}}}