{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:32:47Z","timestamp":1750307567984,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2009,11,2]],"date-time":"2009-11-02T00:00:00Z","timestamp":1257120000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2009,11,2]]},"DOI":"10.1145\/1647314.1647327","type":"proceedings-article","created":{"date-parts":[[2009,11,10]],"date-time":"2009-11-10T18:36:45Z","timestamp":1257878205000},"page":"55-62","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["A speaker diarization method based on the probabilistic fusion of audio-visual location information"],"prefix":"10.1145","author":[{"given":"Kentaro","family":"Ishizuka","sequence":"first","affiliation":[{"name":"NTT Communication Science Laboratories, NTT Corporation, Kyoto, Japan"}]},{"given":"Shoko","family":"Araki","sequence":"additional","affiliation":[{"name":"NTT Communication Science Laboratories, NTT Corporation, Kyoto, Japan"}]},{"given":"Kazuhiro","family":"Otsuka","sequence":"additional","affiliation":[{"name":"NTT Communication Science Laboratories, NTT Corporation, Atsugi, Japan"}]},{"given":"Tomohiro","family":"Nakatani","sequence":"additional","affiliation":[{"name":"NTT Communication Science Laboratories, NTT Corporation, Kyoto, Japan"}]},{"given":"Masakiyo","family":"Fujimoto","sequence":"additional","affiliation":[{"name":"NTT Communication Science Laboratories, NTT Corporation, Kyoto, Japan"}]}],"member":"320","published-online":{"date-parts":[[2009,11,2]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/11677482_3"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-007-9054-4"},{"volume-title":"The Rich Transcription 2007 meeting recognition evaluation. Multimodal Technologies for Perception of Humans, Stifelhagen, R., Bowers, R., and Fiscus, J. (Eds.), LNCS 4625","author":"Fiscus J. G.","key":"e_1_3_2_1_3_1"},{"volume-title":"Proc. Int. Conf. Acoust., Speech, Signal Process., 597--600","author":"Waibel A.","key":"e_1_3_2_1_4_1"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/641007.641112"},{"key":"e_1_3_2_1_6_1","first-page":"740","article-title":"Meetings about meetings: Research at ICSI on speech multiparty conversations","volume":"4","author":"Morgan N.","year":"2003","journal-title":"Proc. Int. Conf. Acoust., Speech, Signal Process."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1322192.1322210"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.878256"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68585-2_47"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68585-2_44"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68585-2_46"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68585-2_50"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/11965152_35"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68585-2_48"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/11677482_29"},{"volume-title":"Proc. Int. Conf. Acoust., Speech, Signal Process., 237--240","author":"Martin A.","key":"e_1_3_2_1_16_1"},{"key":"e_1_3_2_1_17_1","first-page":"557","article-title":"Robust speech activity detection using LDA applied to FF parameters","volume":"1","author":"Padrell J.","year":"2005","journal-title":"Proc. Int. Conf. Acoust., Speech, Signal Process."},{"volume-title":"Proc. INTERSPEECH, 501--504","author":"Armani L.","key":"e_1_3_2_1_18_1"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/11677482_40"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.902460"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/11965152_23"},{"volume-title":"Proc. INTERSPEECH, 2194--2197","author":"Pardo J. M.","key":"e_1_3_2_1_22_1"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1976.1162830"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1452392.1452446"},{"volume-title":"Proc. Int. Conf. Acoust., Speech, Signal Process., 4441--4444","author":"Fujimoto M.","key":"e_1_3_2_1_25_1"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4959513"},{"key":"e_1_3_2_1_27_1","unstructured":"Mateo Lozano O. and Otsuka K. in press. Simultaneous and fast 3D tracking of multiple faces in video sequences by using a particle filter. J. Signal Process. Systems DOI 10.1007\/s11265-008-0250-2.  Mateo Lozano O. and Otsuka K. in press. Simultaneous and fast 3D tracking of multiple faces in video sequences by using a particle filter. J. Signal Process. Systems DOI 10.1007\/s11265-008-0250-2."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-85853-9_8"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.881678"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2005.49"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1155\/S1110865704402303"},{"volume-title":"Proc. Int. Conf. Acoust., Speech, Signal Process., 2221--2224","author":"Ba S. O.","key":"e_1_3_2_1_32_1"},{"key":"e_1_3_2_1_33_1","first-page":"685","article-title":"Real-time monitoring of participants' interaction in a meeting using audio-visual sensors","volume":"2","author":"Busso C.","year":"2007","journal-title":"Proc. Int. Conf. Acoust., Speech, Signal Process."},{"volume-title":"Proc. Joint Workshop Hands-free Speech Commun. Microphone Arrays, 119--123","author":"Potamianos G.","key":"e_1_3_2_1_34_1"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Ishizuka K. Araki S. and Kawahara T. 2008. Statistical speech activity detection based on spatial power distribution for analyses of poster presentations \" Proc. INTERSPEECH 99--102.  Ishizuka K. Araki S. and Kawahara T. 2008. Statistical speech activity detection based on spatial power distribution for analyses of poster presentations \" Proc. INTERSPEECH 99--102.","DOI":"10.21437\/Interspeech.2008-22"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960522"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/97.736233"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.1781622"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2004.828896"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164453"},{"volume-title":"Proc. Joint Workshop Hands-free Speech Commun. Microphone Arrays, 29--32","author":"Araki S.","key":"e_1_3_2_1_41_1"},{"volume-title":"Proc. IEEE Conf. Computer Vision Pattern Recognition, 999--1006","author":"Mikami D.","key":"e_1_3_2_1_42_1"}],"event":{"name":"ICMI-MLMI '09: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERFACES\/WORKSHOP ON MACHINE LEARNING FOR MULTIMODAL INTERFACES","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Cambridge Massachusetts USA","acronym":"ICMI-MLMI '09"},"container-title":["Proceedings of the 2009 international conference on Multimodal interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1647314.1647327","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1647314.1647327","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T12:40:57Z","timestamp":1750250457000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1647314.1647327"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,11,2]]},"references-count":42,"alternative-id":["10.1145\/1647314.1647327","10.1145\/1647314"],"URL":"https:\/\/doi.org\/10.1145\/1647314.1647327","relation":{},"subject":[],"published":{"date-parts":[[2009,11,2]]},"assertion":[{"value":"2009-11-02","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}