{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,7,2]],"date-time":"2023-07-02T06:10:11Z","timestamp":1688278211555},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2013,6,5]],"date-time":"2013-06-05T00:00:00Z","timestamp":1370390400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2013,12]]},"DOI":"10.1007\/s10772-013-9199-z","type":"journal-article","created":{"date-parts":[[2013,6,4]],"date-time":"2013-06-04T13:49:08Z","timestamp":1370353748000},"page":"513-523","source":"Crossref","is-referenced-by-count":1,"title":["A new approach of speaker clustering based on the stereophonic differential energy"],"prefix":"10.1007","volume":"16","author":[{"given":"S.","family":"Ouamour","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"H.","family":"Sayoud","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,6,5]]},"reference":[{"key":"9199_CR1","unstructured":"Ajmera, J., Bourlard, H., & Lapidot, I. (2002). Improved unknown-multiple speaker clustering using HMM (Technical report). IDIAP."},{"key":"9199_CR2","first-page":"605","volume":"1","author":"J. Ajmera","year":"2004","unstructured":"Ajmera, J., Lathoud, G., & McCowan, I. (2004). Clustering and segmenting speakers and their locations in meetings. ICASSP Proceedings, 1, 605\u2013608.","journal-title":"ICASSP Proceedings"},{"key":"9199_CR3","volume-title":"Proc. fall rich transcription workshop (RT-04)","author":"C. Barras","year":"2004","unstructured":"Barras, C., Zhu, X., Meignier, S., & Gauvain, J.-L. (2004). Improving speaker diarization. In Proc. fall rich transcription workshop (RT-04), Palisades, NY, Nov.\u00a02004."},{"key":"9199_CR4","volume-title":"Proceedings of the international conference on spoken language processing (ICSLP4)","author":"M. Ben","year":"2004","unstructured":"Ben, M., Betser, M., Bimbot, F., & Gravier, G. (2004). Speaker diarization using bottom-up clustering based on a parameter-derived distance between adapted GMMs. In Proceedings of the international conference on spoken language processing (ICSLP4), Jeju Islands, South Korea, October 2004."},{"issue":"1\u20132","key":"9199_CR5","doi-asserted-by":"crossref","first-page":"177","DOI":"10.1016\/0167-6393(95)00013-E","volume":"17","author":"F. Bimbot","year":"1995","unstructured":"Bimbot, F., Magrin-Chagnolleau, I., & Mathan, L. (1995). Second-order statistical measures for text-independent broadcaster identification. Speech Communication, 17(1\u20132), 177\u2013192.","journal-title":"Speech Communication"},{"key":"9199_CR6","volume-title":"Inter-noise proceedings, international congress and exhibition on noise control engineering","author":"O. H. Bjor","year":"2001","unstructured":"Bjor, O. H., Enger, J., & Winsvold, B. (2001). Sound intensity for identification of aircraft noise. In Inter-noise proceedings, international congress and exhibition on noise control engineering."},{"key":"9199_CR7","first-page":"357","volume-title":"Actes du 4\u00e8me congr\u00e8s Fran\u00e7ais d\u2019Acoustique","author":"J. F. Bonastre","year":"1997","unstructured":"Bonastre, J. F., & Besacier, L. (1997). Traitement ind\u00e9pendant de sous-bandes fr\u00e9quentielles par des m\u00e9thodes statistiques du second ordre pour la reconnaissance du locuteur. In Actes du 4\u00e8me congr\u00e8s Fran\u00e7ais d\u2019Acoustique, Marseille, France, 14\u201318 April 1997 (pp. 357\u2013360)."},{"key":"9199_CR8","first-page":"645","volume-title":"Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191998","author":"S. S. Chen","year":"1998","unstructured":"Chen, S. S., & Gopalakrishnan, P. (1998). Clustering via the Bayesian information criterion with applications in speech recognition. In Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191998, Seattle, USA (Vol.\u00a02, pp. 645\u2013648)."},{"key":"9199_CR9","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/S0167-6393(00)00027-3","volume":"32","author":"P. Delacourt","year":"2000","unstructured":"Delacourt, P., & Wellekens, C. J. (2000). DISTBIC: a\u00a0speaker-based segmentation for audio data indexing. Speech Communication, 32, 111\u2013126.","journal-title":"Speech Communication"},{"key":"9199_CR10","first-page":"873","volume-title":"Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191991","author":"H. Gish","year":"1991","unstructured":"Gish, H., Siu, M.-H., & Rohlicek, R. (1991). Segregation of speakers for speech recognition and speaker identification. In Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191991, Toronto, Canada (Vol.\u00a02, pp. 873\u2013876)."},{"key":"9199_CR11","series-title":"Lecture notes in computer sciences (LNCS)","volume-title":"NIST rich transcription\u00a02005 spring meeting recognition evaluation (RT\u201905S)","author":"F. Istrate","year":"2005","unstructured":"Istrate, F., Fredouille, C., Meignier, S., Besacier, L., & Bonastre, J.-F. (2005). Pre-processing techniques and speaker diarization on multiple microphone meetings. In NIST rich transcription\u00a02005 spring meeting recognition evaluation (RT\u201905S), Lecture notes in computer sciences (LNCS), Edinburgh, Scotland. Berlin: Springer."},{"key":"9199_CR12","unstructured":"Jacobsen, F. (2002). Sound intensity and its measurement and applications. Acoustic Technology, Technical, University of Denmark, Lyngby, Denmark. Note Number 2216."},{"key":"9199_CR13","first-page":"108","volume-title":"DARPA speech recognition workshop","author":"H. Jin","year":"1997","unstructured":"Jin, H., Kubala, F., & Schwartz, R. (1997). Automatic speaker clustering. In DARPA speech recognition workshop, Chantilly, USA (pp. 108\u2013111)."},{"key":"9199_CR14","volume-title":"NIST 2004 spring rich transcription evaluation workshop","author":"Q. Jin","year":"2004","unstructured":"Jin, Q., Laskowski, K., Schultz, T., & Waibel, A. (2004). Speaker segmentation and clustering in meetings. In NIST 2004 spring rich transcription evaluation workshop, Montreal, Canada."},{"key":"9199_CR15","first-page":"1775","volume-title":"Proc. international conference on speech and language processing","author":"S. Johnson","year":"1998","unstructured":"Johnson, S., & Woodland, P. (1998). Speaker clustering using direct maximization of the MLLRadapted likelihood. In Proc. international conference on speech and language processing, Sydney, Australia, Dec. 1998 (Vol.\u00a05, pp. 1775\u20131779)."},{"key":"9199_CR16","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1007\/978-3-540-68585-2_45","volume-title":"NIST RT 2007 evaluation in multimodal technologies for perception of humans","author":"E. C. Koh","year":"2008","unstructured":"Koh, E. C., Sun, H., Nwe, T. L., Nguyen, T. H., Ma, B., Chng, E.-S., Li, H., & Rahardja, S. (2008). Speaker diarization using direction of arrival estimate and acoustic feature information, the I2R-NTU submission. In NIST RT 2007 evaluation in multimodal technologies for perception of humans (pp. 484\u2013496). Berlin\/Heidelberg: Springer."},{"key":"9199_CR17","doi-asserted-by":"crossref","first-page":"337","DOI":"10.21437\/Eurospeech.1995-90","volume-title":"Proceedings of EUROSPEECH 95","author":"I. Magrin-Chagnolleau","year":"1995","unstructured":"Magrin-Chagnolleau, I., Bonastre, J. F., & Bimbot, F. (1995). Effect of utterance duration and phonetic content on speaker identification using second-order statistical methods. In Proceedings of EUROSPEECH 95, Madrid, Spain, September 1995 (Vol.\u00a01, pp. 337\u2013340)."},{"key":"9199_CR18","unstructured":"Meignier, S. (2002). Indexation en locuteurs de documents sonores: segmentation d\u2019un document et appariement d\u2019une collection. PhD Thesis, Laboratoire Informatique d\u2019Avignon (LIA), Universit\u00e9 d\u2019Avignon et des Pays de Vaucluse, Avignon (France)."},{"key":"9199_CR19","first-page":"85","volume-title":"Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20192003","author":"Y. Moh","year":"2003","unstructured":"Moh, Y., Nguyen, P., & Junqua, J.-C. (2003). Towards domain independent speaker clustering. In Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20192003, Hong Kong, China (pp. 85\u201388)."},{"key":"9199_CR20","first-page":"413","volume-title":"Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20192001","author":"K. Mori","year":"2001","unstructured":"Mori, K., & Nakagawa, S. (2001). Speaker change detection and speaker clustering using VQ distortion for broadcast news speech recognition. In Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20192001, Salt Lake City, USA (pp. 413\u2013416)."},{"key":"9199_CR21","doi-asserted-by":"crossref","first-page":"676","DOI":"10.1109\/ICASSP.1993.319401","volume-title":"Proc. IEEE\u00a0international conference on acoustics, speech and signal processing, ICASSP\u20191993","author":"S. Nakagawa","year":"1993","unstructured":"Nakagawa, S., & Suzuki, H. (1993). A\u00a0new speech recognition method based on VQ-distortion and HMM. In Proc. IEEE\u00a0international conference on acoustics, speech and signal processing, ICASSP\u20191993, Minneapolis, USA (pp. 676\u2013679)."},{"issue":"3","key":"9199_CR22","first-page":"104","volume":"5","author":"S. Ouamour","year":"2009","unstructured":"Ouamour, S., & Sayoud, H. (2009). A new approach for speaker change detection using a fusion of different classifiers and a new relative characteristic. The Mediterranean Journal of Computers and Networks, 5(3), 104\u2013113. ISSN:1744-2400","journal-title":"The Mediterranean Journal of Computers and Networks"},{"key":"9199_CR23","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1007\/s10772-012-9132-x","volume":"15","author":"S. Ouamour","year":"2012","unstructured":"Ouamour, S., & Sayoud, H. (2012). A pertinent learning machine input feature for speaker discrimination by voice. International Journal of Speech Technology, 15, 181\u2013190.","journal-title":"International Journal of Speech Technology"},{"key":"9199_CR24","volume-title":"Rich transcription workshop (RTW\u201904)","author":"D. A. Reynolds","year":"2004","unstructured":"Reynolds, D. A., & Torres-Carrasquillo, P. (2004). The MIT Lincoln laboratories RT-04F diarization systems: applications to broadcast audio and telephone conversations. In Rich transcription workshop (RTW\u201904), Palisades, NY."},{"key":"9199_CR25","volume-title":"Proc. international conference on speech and language processing","author":"D. A. Reynolds","year":"1998","unstructured":"Reynolds, D. A., Singer, E., Carlson, B. A., O\u2019Leary, G. C., McLaughlin, J. J., & Zixxman, M. A. (1998). Blind clustering of speech utterances based on speaker and language characteristics. In Proc. international conference on speech and language processing, Sidney, Australia."},{"key":"9199_CR26","volume-title":"Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20192006","author":"J. Rougui","year":"2006","unstructured":"Rougui, J., Rziza, M., Aboutajdine, D., Gelgon, M., & Martinez, J. (2006). Fast incremental clustering of Gaussian mixture speaker models for scaling up retrieval in on-line broadcast. In Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20192006, Toulouse, France (Vol.\u00a05)."},{"issue":"4","key":"9199_CR27","first-page":"702","volume":"89","author":"H. Sayoud","year":"2003","unstructured":"Sayoud, H., Ouamour, S., & Boudraa, M. (2003). \u2018ASTRA\u2019 an automatic speaker tracking system based on SOSM measures and an interlaced indexation. Acta Acustica, 89(4), 702\u2013710.","journal-title":"Acta Acustica"},{"issue":"1","key":"9199_CR28","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1108\/17563781111115787","volume":"4","author":"H. Sayoud","year":"2011","unstructured":"Sayoud, H., Ouamour, S., & Khennouf, S. (2011). Automatic speaker tracking by camera using two-channel-based sound source localization. International Journal of Intelligent Computing and Cybernetics, 4(1), 40\u201360.","journal-title":"International Journal of Intelligent Computing and Cybernetics"},{"key":"9199_CR29","volume-title":"Research abstracts\u20142007, research project. MIT CSAIL publications and digital archives","author":"K. Schutte","year":"2007","unstructured":"Schutte, K., & Glass, J. (2007) Features and classifiers for robust automatic speech recognition. In Research abstracts\u20142007, research project. MIT CSAIL publications and digital archives."},{"key":"9199_CR30","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1214\/aos\/1176344136","volume":"6","author":"G. Schwarz","year":"1978","unstructured":"Schwarz, G. (1978). Estimating the dimension of a model. The Annals of Statistics, 6, 461\u2013464.","journal-title":"The Annals of Statistics"},{"key":"9199_CR31","volume-title":"Odyssey workshop on speaker and language recognition","author":"E. Singer","year":"2012","unstructured":"Singer, E., Torres-Carrasquillo, P., Reynolds, D., McCree, A., Richardson, F., Dehak, N., & Sturim, D. (2012). The MITLL NIST LRE 2011 language recognition system. In Odyssey workshop on speaker and language recognition, Singapore, 26 June 2012."},{"key":"9199_CR32","first-page":"189","volume-title":"Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191992","author":"M.-H. Siu","year":"1992","unstructured":"Siu, M.-H., Yu, G., & Gish, H. (1992). An unsupervised, sequential learning algorithm for the segmentation of speech waveforms with multiple speakers. In Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191992, San Francisco, USA (Vol.\u00a02, pp. 189\u2013192)."},{"key":"9199_CR33","first-page":"757","volume-title":"Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191998","author":"A. Solomonov","year":"1998","unstructured":"Solomonov, A., Mielke, A., Schmidt, M., & Gish, H. (1998). Clustering speakers by their voices. In Proc. IEEE international conference on acoustics, speech and signal processing, ICASSP\u20191998, Seattle, USA (Vol.\u00a02, pp. 757\u2013760)."},{"key":"9199_CR34","volume-title":"Proc. ISCA Odyssey 2004 workshop on speaker and language recognition","author":"S. Tranter","year":"2004","unstructured":"Tranter, S., & Reynolds, D. (2004). Speaker diarisation for broadcast news. In Proc. ISCA Odyssey 2004 workshop on speaker and language recognition, Toledo, June 2004."},{"key":"9199_CR35","unstructured":"Valente, F. (2005). Variational Bayesian methods for audio indexing. PhD Thesis, Universit\u00e9 de Nice-Sophia Antipolis."},{"key":"9199_CR36","first-page":"207","volume-title":"Odyssey\u20192004, the speaker and language recognition workshop","author":"F. Valente","year":"2004","unstructured":"Valente, F., & Wellekens, C. J. (2004). Variational Bayesian speaker clustering. In Odyssey\u20192004, the speaker and language recognition workshop, Toledo, Spain (pp. 207\u2013214)."},{"key":"9199_CR37","doi-asserted-by":"crossref","first-page":"4954","DOI":"10.1109\/ICASSP.2010.5495087","volume-title":"Proceedings of the IEEE international conference on acoustics, speech, and signal processing, ICASSP\u20192010","author":"F. Valente","year":"2010","unstructured":"Valente, F., Motlicek, P., & Vijayasenan, D. (2010). Variational Bayesian speaker diarization of meeting recordings. In Proceedings of the IEEE international conference on acoustics, speech, and signal processing, ICASSP\u20192010, Dallas, Texas, USA (pp. 4954\u20134957)."},{"key":"9199_CR38","volume-title":"IEEE int. conf. on acoustics, speech, and signal processing (ICASSP)","author":"D. Vijayasenan","year":"2008","unstructured":"Vijayasenan, D., Valente, F., & Bourlard, H. (2008). Combination of agglomerative and sequential clustering for speaker diarization. In IEEE int. conf. on acoustics, speech, and signal processing (ICASSP)."},{"key":"9199_CR39","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1007\/978-3-540-72847-4_71","volume-title":"Proceedings of the 3rd Iberian conference on pattern recognition and image analysis","author":"W. Wang","year":"2007","unstructured":"Wang, W., Lv, P., Zhao, Q., & Yan, Y. (2007). A decision-tree-based online speaker clustering. In Proceedings of the 3rd Iberian conference on pattern recognition and image analysis, Girona, Spain (pp. 555\u2013562)."},{"key":"9199_CR40","first-page":"639","volume-title":"Proceedings of the 6th international symposium on neural networks: advances in neural networks, China section: clustering and classification","author":"H. Wang","year":"2009","unstructured":"Wang, H., Zhang, X., Suo, H., Zhao, Q., & Yan, Y. (2009). A novel fuzzy-based automatic speaker clustering algorithm. In Proceedings of the 6th international symposium on neural networks: advances in neural networks, China section: clustering and classification, Wuhan (pp. 639\u2013646)."},{"key":"9199_CR41","unstructured":"Xavier, A. M. (2006). Robust speaker diarization for meetings. PhD Thesis, Speech Processing Group Department of Signal Theory and Communications Universitat Politecnica de Catalunya Barcelona (Espagnol), October 2006."},{"key":"9199_CR42","first-page":"714","volume-title":"Proc. international conference on speech and language processing","author":"B. Zhou","year":"2000","unstructured":"Zhou, B., & Hansen, J. H. (2000). Unsupervised audio stream segmentation and clustering via the Bayesian information criterion. In Proc. international conference on speech and language processing, Beijing, China (Vol.\u00a03, pp. 714\u2013717)."},{"key":"9199_CR43","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1007\/978-3-642-04208-9_31","volume-title":"Proceedings of the 12th international conference on text, speech and dialogue","author":"J. \u017dibert","year":"2009","unstructured":"\u017dibert, J., & Miheli\u010d, F. (2009). Fusion of acoustic and prosodic features for speaker clustering. In Proceedings of the 12th international conference on text, speech and dialogue, Pilsen, Czech Republic (pp. 210\u2013217)."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-013-9199-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-013-9199-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-013-9199-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,2]],"date-time":"2023-07-02T05:33:43Z","timestamp":1688276023000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-013-9199-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,6,5]]},"references-count":43,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2013,12]]}},"alternative-id":["9199"],"URL":"https:\/\/doi.org\/10.1007\/s10772-013-9199-z","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,6,5]]}}}