{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T04:10:03Z","timestamp":1738815003821,"version":"3.37.0"},"publisher-location":"Berlin, Heidelberg","reference-count":12,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540897958"},{"type":"electronic","value":"9783540897965"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89796-5_71","type":"book-chapter","created":{"date-parts":[[2008,12,9]],"date-time":"2008-12-09T16:29:15Z","timestamp":1228840155000},"page":"693-702","source":"Crossref","is-referenced-by-count":1,"title":["Speaker Clustering Aided by Visual Dialogue Analysis"],"prefix":"10.1007","author":[{"given":"Shuang","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Wei","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jia","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yimin","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"71_CR1","unstructured":"Reynolds, D.A., Torres Carrasquillo, P.: The MIT Lincoln Laboratory RT-04F Diarization Systems: Applications to Broadcast Audio and Telephone Conversations, RT-04F Workshop (2004)"},{"key":"71_CR2","unstructured":"Chen, S., Gopalakrishnan, P.: Speaker, environment and channel change detection and clustering via the Bayesian information criterion. In: DARPA speech recognition workshop (1998)"},{"key":"71_CR3","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/S0167-6393(00)00027-3","volume":"32","author":"P. Delacourt","year":"2000","unstructured":"Delacourt, P., Wellekens, C.: DISTBIC: A speaker-based segmentation for audio data indexing. Speech Communications\u00a032, 111\u2013126 (2000)","journal-title":"Speech Communications"},{"key":"71_CR4","unstructured":"Lehane, B., O\u2019Connor, N., Murphy, N.: Dialogue Scene Detection in Movies using Low and Mid-Level Visual Features. In: International Workshop on Image, Video, and Audio Retrieval and Mining, Quebec, Canada, 25-26 (2004)"},{"key":"71_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1007\/11526346_32","volume-title":"Image and Video Retrieval","author":"B. Lehane","year":"2005","unstructured":"Lehane, B., O\u2019Connor, N., Murphy, N.: Dialogue Sequence Detection in Movies. In: Leow, W.-K., Lew, M., Chua, T.-S., Ma, W.-Y., Chaisorn, L., Bakker, E.M. (eds.) CIVR 2005. LNCS, vol.\u00a03568, pp. 286\u2013296. Springer, Heidelberg (2005)"},{"key":"71_CR6","doi-asserted-by":"crossref","unstructured":"Chen, L., Rizvi, S.J., Otzu, M.T.: Incorporating audio cues into dialog and action scene detection. In: Proceedings of SPIE Conference on Storage and Retrieval for Media Databases, pp. 252\u2013264 (2003)","DOI":"10.1117\/12.476317"},{"key":"71_CR7","doi-asserted-by":"crossref","unstructured":"Zhai, Y., Rasheed, Z., Shah, M.: A framework for semantic classification of scenes using finite state machines. In: International Conference on Image and Video Retrieval (2004)","DOI":"10.1007\/978-3-540-27814-6_35"},{"key":"71_CR8","unstructured":"ESPS with waves, Entropic Research Laboratory, Inc. AT&T Bell Laboratories (1993)"},{"key":"71_CR9","doi-asserted-by":"crossref","unstructured":"Li, C., Ou, Z., Hu, W., Wang, T., Zhang, Y.: Caption-aided speech detection in videos. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 141\u2013144 (2008)","DOI":"10.1109\/ICASSP.2008.4517566"},{"key":"71_CR10","doi-asserted-by":"crossref","unstructured":"Reynolds, D.A., Torres-Carrasquillo, P.: Approaches and Applications of Audio Diarization. Special Session on HLT. In: IEEE International Conference on Acoustics, Speech and Signal Processing (2005)","DOI":"10.1109\/ICASSP.2005.1416463"},{"key":"71_CR11","unstructured":"Siegler, M.A., Jain, U., Raj, B., Stern, R.M.: Automatic Segmentation, Classification and Clustering of Broadcast News Audio. In: DARPA Speech Recognition Workshop, pp. 97\u201399 (1997)"},{"key":"71_CR12","doi-asserted-by":"crossref","unstructured":"Delacourt, P., Wellekens, C.: Audio data indexing: Use of second-order statistics for speaker-based segmentation. In: Proc. of the IEEE Int. Conf. on Multimedia Computing and Systems, Florence, Italy, vol.\u00a0II, pp. 959\u2013963 (1999)","DOI":"10.1109\/MMCS.1999.778619"}],"container-title":["Lecture Notes in Computer Science","Advances in Multimedia Information Processing - PCM 2008"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89796-5_71","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T03:00:29Z","timestamp":1738810829000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89796-5_71"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540897958","9783540897965"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89796-5_71","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}