{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T11:29:26Z","timestamp":1765279766822},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2012,2,1]],"date-time":"2012-02-01T00:00:00Z","timestamp":1328054400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2012,2]]},"DOI":"10.1109\/tasl.2011.2160167","type":"journal-article","created":{"date-parts":[[2012,1,31]],"date-time":"2012-01-31T18:28:23Z","timestamp":1328034503000},"page":"436-446","source":"Crossref","is-referenced-by-count":20,"title":["Simultaneous Speech Detection With Spatial Features for Speaker Diarization"],"prefix":"10.1109","volume":"20","author":[{"given":"Martin","family":"Zelenak","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Carlos","family":"Segura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jordi","family":"Luque","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Javier","family":"Hernando","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/502512.502527"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367247"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1121\/1.392786"},{"key":"ref30","first-page":"21","article-title":"Qualcomm-ICSI-OGI features for ASR","author":"adami","year":"2002","journal-title":"Proc InterSpeech-ICSLP"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1007\/978-3-540-68585-2_44","article-title":"Progress in the AMIDA speaker diarization system for meeting data","volume":"4625 2008","author":"van leeuwen","year":"2008","journal-title":"Multimodal Technol Perception Humans"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2062507"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1007\/11965152_33","article-title":"The AMI speaker diarization system for NIST RT06s meeting data","volume":"4299 2006","author":"van leeuwen","year":"2006","journal-title":"Proc Mach Learn Multimodal Interaction"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"1063","DOI":"10.21437\/Interspeech.2009-326","article-title":"Speech Overlap Detection in a Two-Pass Speaker Diarization System","author":"huijbregts","year":"2009","journal-title":"Proc Interspeech'09"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"32","DOI":"10.21437\/Interspeech.2008-6","article-title":"Two's a crowd: Improving speaker diarization by automatically identifying and excluding overlapped speech","author":"boakye","year":"2008","journal-title":"Proc Interspeech'08"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.902460"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2003.818027"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"2302","DOI":"10.21437\/Interspeech.2010-631","article-title":"Overlap detection for speaker diarization by fusing spectral and spatial features","author":"zelenk","year":"2010","journal-title":"Proc Interspeech'10"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518619"},{"key":"ref17","article-title":"The Spectral Autocorrelation Peak Valley Ratio (SAPVR)&#x2014;A usable speech measure employed as a co-channel detection system","author":"yantorno","year":"2001","journal-title":"Proc IEEE Int Workshop Intell Signal Process (WISP)"},{"key":"ref18","first-page":"231","article-title":"Usable speech detection using linear predictive analysis&#x2014;A model based approach","author":"sundaram","year":"2003","journal-title":"Proc ISPACS"},{"key":"ref19","first-page":"231","article-title":"A robust method for speech signal time-delay estimation in reverberant rooms","author":"svaizer","year":"1997","journal-title":"Proc ICASSP'97"},{"key":"ref28","year":"0","journal-title":"The Rich Transcription 2009 Meeting Recognition Evaluation"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2001.1034599"},{"key":"ref27","article-title":"Automatic cluster complexity and quantity selection: Towards robust speaker diarization","author":"anguera","year":"2006","journal-title":"Proc Speaker Odyssey"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2007.4430194"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2004.838531"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"383","DOI":"10.21437\/Interspeech.2008-154","article-title":"Clustering initialization based on spatial information for speaker diarization of meetings","author":"luque","year":"2008","journal-title":"Proc Interspeech'08"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"973","DOI":"10.21437\/Interspeech.2004-350","article-title":"Crosscorrelation-based multispeaker speech activity detection","author":"laskowski","year":"2004","journal-title":"Proc InterSpeech-ICSLP"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"2522","DOI":"10.21437\/Interspeech.2008-625","article-title":"Multi-speaker meeting audio segmentation","author":"nwe","year":"2008","journal-title":"Proc Interspeech'08"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660190"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"1781","DOI":"10.21437\/Interspeech.2005-3","article-title":"Spontaneous speech: How people really talk and why engineers should care","author":"shriberg","year":"2005","journal-title":"Proc Interspeech'05"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2003.1221388"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"1359","DOI":"10.21437\/Eurospeech.2001-352","article-title":"Observations on overlap: Findings and implications for automatic processing of multi-party conversation","volume":"2","author":"shriberg","year":"2001","journal-title":"Proc Eurospeech'01"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1997.599651"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4517554"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1007\/978-3-540-68585-2_50","article-title":"Speaker diarization for conference room: The UPC RT07s evaluation system","volume":"4625 2008","author":"luque","year":"2008","journal-title":"Multimodal Technol Perception Humans"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/83.855432"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0075-7"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2003.1318476"},{"key":"ref25","first-page":"1849","article-title":"Improved location features for meeting speaker diarization","author":"otterson","year":"2007","journal-title":"Proc Interspeech'07"}],"container-title":["IEEE Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/10376\/6099652\/06136544.pdf?arnumber=6136544","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,14]],"date-time":"2023-06-14T05:48:15Z","timestamp":1686721695000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6136544\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,2]]},"references-count":35,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tasl.2011.2160167","relation":{},"ISSN":["1558-7916","1558-7924"],"issn-type":[{"value":"1558-7916","type":"print"},{"value":"1558-7924","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,2]]}}}