{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,19]],"date-time":"2025-03-19T14:41:07Z","timestamp":1742395267115},"reference-count":11,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1109\/icassp.2003.1200085","type":"proceedings-article","created":{"date-parts":[[2004,1,24]],"date-time":"2004-01-24T04:33:03Z","timestamp":1074918783000},"page":"V-772-5","source":"Crossref","is-referenced-by-count":8,"title":["Audio-visual synchrony for detection of monologues in video archives"],"prefix":"10.1109","volume":"5","author":[{"given":"G.","family":"Iyengar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"H.J.","family":"Nock","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"C.","family":"Neti","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP.1999.793814"},{"key":"ref3","article-title":"Assessing face and speech consistency for monologue detectionin video","author":"nock","year":"2002","journal-title":"Proc ACM Multimedia"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.1109\/ICASSP.2002.1005673","article-title":"Robust Speech Recognition in Noisy Environments: The IBM Spine-2 Evaluation System","author":"kingsbury","year":"2002","journal-title":"Proc ICASSP"},{"key":"ref6","article-title":"Learning Joint Statistical Models for Audio-Visual Fusion and Segregation","author":"fisher","year":"2001","journal-title":"Proc NIPS"},{"key":"ref11","article-title":"Speaker, environment and channel change detection and clustering via the bayesian information criterion","author":"chen","year":"1998","journal-title":"Proc Intl Conf Acoust Sp & Sig Proc"},{"key":"ref5","article-title":"Using audio-visual synchrony to locate sounds","author":"hershey","year":"1999","journal-title":"Proc NIPS"},{"key":"ref8","article-title":"Facesync: a linear operator for measuring synchronization of video facial images and audio tracks","author":"slaney","year":"2001","journal-title":"Proc NIPS"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1109\/ICASSP.2002.5745560","article-title":"Informative subspaces for audiovisual processing: High-level function from low-level fusion","author":"fisher","year":"2002","journal-title":"Proc ICASSP"},{"key":"ref2","article-title":"Look Who's Talking: Speaker Detection using Video and Audio Correlation","author":"cutler","year":"2000","journal-title":"Proc JCME"},{"key":"ref9","article-title":"Audio-visual speech recognition","author":"neti","year":"2000","journal-title":"CLSP Summer Workshop Tech Rep WS00AVSR"},{"year":"0","key":"ref1","article-title":"Text retrieval conference (tree) video track"}],"event":{"name":"International Conference on Acoustics, Speech and Signal Processing (ICASSP'03)","acronym":"ICASSP-03","location":"Hong Kong, China"},"container-title":["2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings. (ICASSP '03)."],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/8535\/26996\/01200085.pdf?arnumber=1200085","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,16]],"date-time":"2017-06-16T03:18:10Z","timestamp":1497583090000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/1200085\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"references-count":11,"URL":"https:\/\/doi.org\/10.1109\/icassp.2003.1200085","relation":{},"subject":[]}}