{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T14:42:29Z","timestamp":1774881749513,"version":"3.50.1"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319995786","type":"print"},{"value":"9783319995793","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-99579-3_13","type":"book-chapter","created":{"date-parts":[[2018,8,24]],"date-time":"2018-08-24T07:36:09Z","timestamp":1535096169000},"page":"113-122","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A Free Synthetic Corpus for Speaker Diarization Research"],"prefix":"10.1007","author":[{"given":"Erik","family":"Edwards","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Brenndoerfer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Amanda","family":"Robinson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Najmeh","family":"Sadoughi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Greg P.","family":"Finley","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Maxim","family":"Korenevsky","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nico","family":"Axtmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mark","family":"Miller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Suendermann-Oeft","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,25]]},"reference":[{"key":"13_CR1","doi-asserted-by":"crossref","unstructured":"Anguera Mir\u00f3, X.: Robust speaker diarization for meetings. Ph.D. thesis, Univ. Polit\u00e8cnica de Catalunya (2006)","DOI":"10.21437\/Interspeech.2006-466"},{"issue":"2","key":"13_CR2","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1109\/TASL.2011.2125954","volume":"20","author":"X Anguera Mir\u00f3","year":"2012","unstructured":"Anguera Mir\u00f3, X., Bozonnet, S., Evans, N., Fredouille, C., Friedland, G., Vinyals, O.: Speaker diarization: a review of recent research. IEEE Trans. Audio Speech Lang. Process. 20(2), 356\u2013370 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Anguera Mir\u00f3, X., Hernando Peric\u00e1s, F.: Evolutive speaker segmentation using a repository system. In: Proceedings of ICSLP, pp. 605\u2013608. ISCA (2004)","DOI":"10.21437\/Interspeech.2004-251"},{"key":"13_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/11677482_34","volume-title":"Machine Learning for Multimodal Interaction","author":"X Anguera","year":"2006","unstructured":"Anguera, X., Wooters, C., Peskin, B., Aguil\u00f3, M.: Robust speaker segmentation for meetings: the ICSI-SRI spring 2005 diarization system. In: Renals, S., Bengio, S. (eds.) MLMI 2005. LNCS, vol. 3869, pp. 402\u2013414. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11677482_34"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Bozonnet, S., Vipperla, R., Evans, N.: Phone adaptive training for speaker diarization. In: Proceedings of INTERSPEECH, pp. 494\u2013497. ISCA (2012)","DOI":"10.21437\/Interspeech.2012-166"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Burger, S., MacLaren, V., Yu, H.: The ISL meeting corpus: the impact of meeting type on speech style. In: Proceedings of ICSLP, pp. 301\u2013304. ISCA (2002)","DOI":"10.21437\/ICSLP.2002-140"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Chen, I.F., Cheng, S.S., Wang, H.M.: Phonetic subspace mixture model for speaker diarization. In: Proceedings of INTERSPEECH, pp. 2298\u20132301. ISCA (2010)","DOI":"10.21437\/Interspeech.2010-630"},{"key":"13_CR8","unstructured":"Delacourt, P., Kryze, D., Wellekens, C.: Speaker-based segmentation for audio data indexing. In: Proceedings of ESCA Tutorial and Research Workshop, pp. 78\u201383. ISCA (1999)"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Finley, G., et al.: An automated medical scribe for documenting clinical encounters. In: Proceedings of NAACL. ACL (2018)","DOI":"10.18653\/v1\/N18-5003"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Gangadharaiah, R., Narayanaswamy, B.: A novel method for two-speaker segmentation. In: Proceedings of ICSLP, pp. 2337\u20132340. ISCA (2004)","DOI":"10.21437\/Interspeech.2004-525"},{"key":"13_CR11","unstructured":"Garofolo, J., Laprun, C., Michel, M., Stanford, V., Tabassi, E.: The NIST meeting room pilot corpus. In: Proceedings of LREC, p. 4. ELRA (2004)"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Gauvain, J.L., Adda, G., Lamel, L., Adda-Decker, M.: Transcribing broadcast news: the LIMSI Nov96 Hub4 system. In: Proceedings of DARPA Speech Recognition Workshop, pp. 56\u201363. DARPA (1997)","DOI":"10.21437\/Eurospeech.1997-323"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Gish, H., Siu, M.H., Rohlicek, J.: Segregation of speakers for speech recognition and speaker identification. In: Proceedings of ICASSP, vol. 2, pp. 873\u2013876. IEEE (1991)","DOI":"10.1109\/ICASSP.1991.150477"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Godfrey, J., Holliman, E., McDaniel, J.: SWITCHBOARD: telephone speech corpus for research and development. In: Proceedings of ICASSP, vol. 1, pp. 517\u2013520. IEEE (1992)","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"13_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"344","DOI":"10.1007\/11677482_30","volume-title":"Machine Learning for Multimodal Interaction","author":"T Hain","year":"2006","unstructured":"Hain, T., et al.: The development of the AMI system for the transcription of speech in meetings. In: Renals, S., Bengio, S. (eds.) MLMI 2005. LNCS, vol. 3869, pp. 344\u2013356. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11677482_30"},{"issue":"4","key":"13_CR16","doi-asserted-by":"publisher","first-page":"555","DOI":"10.1016\/j.wocn.2010.08.002","volume":"38","author":"M Heldner","year":"2010","unstructured":"Heldner, M., Edlund, J.: Pauses, gaps and overlaps in conversations. J. Phon. 38(4), 555\u2013568 (2010)","journal-title":"J. Phon."},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Hsieh, C.H., Wu, C.H., Shen, H.P.: Adaptive decision tree-based phone cluster models for speaker clustering. In: Proceedings of INTERSPEECH, pp. 861\u2013864. ISCA (2008)","DOI":"10.21437\/Interspeech.2008-276"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Ikbal, S., Visweswariah, K.: Learning essential speaker sub-space using hetero-associative neural networks for speaker clustering. In: Proceedings of INTERSPEECH, pp. 28\u201331. ISCA (2008)","DOI":"10.21437\/Interspeech.2008-5"},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Janin, A., et al.: The ICSI meeting corpus. In: Proceedings of ICASSP, vol. 1, pp. 364\u2013367. IEEE (2003)","DOI":"10.1109\/ICASSP.2003.1198793"},{"issue":"4\u20135","key":"13_CR20","doi-asserted-by":"publisher","first-page":"667","DOI":"10.1016\/j.engappai.2009.01.012","volume":"22","author":"S Jothilakshmi","year":"2009","unstructured":"Jothilakshmi, S., Ramalingam, V., Palanivel, S.: Speaker diarization using autoassociative neural networks. Eng. Appl. Artif. Intell. 22(4\u20135), 667\u2013675 (2009)","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"3","key":"13_CR21","doi-asserted-by":"publisher","first-page":"1684","DOI":"10.1109\/TCE.2010.5606313","volume":"56","author":"K Kim","year":"2010","unstructured":"Kim, K., Kim, M.: Robust speaker recognition against background noise in an enhanced multi-condition domain. IEEE Trans. Consum. Electron. 56(3), 1684\u20131688 (2010)","journal-title":"IEEE Trans. Consum. Electron."},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Liu, C., Yan, Y.: Speaker change detection using minimum message length criterion. In: Proceedings of ICSLP, pp. 514\u2013517. ISCA (2000)","DOI":"10.21437\/ICSLP.2000-585"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Meinedo, H., Neto, J.: A stream-based audio segmentation, classification and clustering pre-processing system for broadcast news using ANN models. In: Proceedings of INTERSPEECH, pp. 237\u2013240. ISCA (2005)","DOI":"10.21437\/Interspeech.2005-117"},{"key":"13_CR24","unstructured":"Metzger, Y.: Blind segmentation of a multi-speaker conversation using two different sets of features. In: Proceedings of Odyssey Workshop, pp. 157\u2013162. ISCA (2001)"},{"issue":"10","key":"13_CR25","doi-asserted-by":"publisher","first-page":"1065","DOI":"10.1016\/j.specom.2012.05.002","volume":"54","author":"M Moattar","year":"2012","unstructured":"Moattar, M., Homayounpour, M.: A review on speaker diarization systems and approaches. Speech Commun. 54(10), 1065\u20131103 (2012)","journal-title":"Speech Commun."},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Mohammadi, S., Sameti, H., Langarani, M., Tavanaei, A.: KNNDIST: a non-parametric distance measure for speaker segmentation. In: Proceedings of INTERSPEECH, pp. 2282\u20132285. ISCA (2012)","DOI":"10.21437\/Interspeech.2012-599"},{"key":"13_CR27","unstructured":"NIST: Spring 2006 (RT-06S) Rich Transcription Meeting Recognition Evaluation plan. Report RT-06S, National Institute of Standards and Technology, Spring 2006"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: LibriSpeech: an ASR corpus based on public domain audio books. In: Proceedings of ICASSP, pp. 5206\u20135210. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"13_CR29","unstructured":"Povey, D., et al.: The Kaldi speech recognition toolkit. In: Proceedings of Workshop ASRU, Waikoloa Village, HI, p. 4. IEEE (2011)"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Rohlicek, J., et al.: Gisting conversational speech. In: Proceedings of ICASSP, vol. 2, pp. 113\u2013116. IEEE (1992)","DOI":"10.1109\/ICASSP.1992.226107"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Schindler, C., Draxler, C.: Using spectral moments as a speaker specific feature in nasals and fricatives. In: Proceedings of INTERSPEECH, pp. 2793\u20132796. ISCA (2013)","DOI":"10.21437\/Interspeech.2013-639"},{"key":"13_CR32","first-page":"125","volume-title":"Trends in Speech Recognition","author":"J Shoup","year":"1980","unstructured":"Shoup, J.: Phonological aspects of speech recognition. In: Lea, W. (ed.) Trends in Speech Recognition, pp. 125\u2013138. Prentice-Hall, Englewood Cliffs (1980)"},{"key":"13_CR33","unstructured":"Siegler, M., Jain, U., Raj, B., Stern, R.: Automatic segmentation, classification and clustering of broadcast news audio. In: Proceedings of DARPA Speech Recognition Workshop, pp. 97\u201399. DARPA (1997)"},{"key":"13_CR34","doi-asserted-by":"crossref","unstructured":"Siu, M.H., Yu, G., Gish, H.: An unsupervised, sequential learning algorithm for the segmentation of speech waveforms with multiple speakers. In: Proceedings of ICASSP, vol. 2, pp. 189\u2013192. IEEE (1992)","DOI":"10.1109\/ICASSP.1992.226088"},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Soldi, G., Bozonnet, S., Alegre, F., Beaugeant, C., Evans, N.: Short-duration speaker modelling with phone adaptive training. In: Proceedings of Odyssey Workshop, pp. 208\u2013215. ISCA (2014)","DOI":"10.21437\/Odyssey.2014-32"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"S\u00f6nmez, M., Heck, L., Weintraub, M.: Speaker tracking and detection with multiple speakers. In: Proceedings of EUROSPEECH, pp. 2219\u20132222. ISCA (1999)","DOI":"10.21437\/Eurospeech.1999-492"},{"issue":"26","key":"13_CR37","doi-asserted-by":"publisher","first-page":"10587","DOI":"10.1073\/pnas.0903616106","volume":"106","author":"T Stivers","year":"2009","unstructured":"Stivers, T., et al.: Universals and cultural variation in turn-taking in conversation. Proc. Natl. Acad. Sci U.S.A. 106(26), 10587\u201310592 (2009)","journal-title":"Proc. Natl. Acad. Sci U.S.A."},{"key":"13_CR38","doi-asserted-by":"crossref","unstructured":"Sugiyama, M., Murakami, J., Watanabe, H.: Speech segmentation and clustering based on speaker features. In: Proceedings of ICASSP, vol. 2, pp. 395\u2013398. IEEE (1993)","DOI":"10.1109\/ICASSP.1993.319322"},{"key":"13_CR39","doi-asserted-by":"crossref","unstructured":"Takagi, K., Itahashi, S.: Segmentation of spoken dialogue by interjections, disfluent utterances and pauses. In: Proceedings of ICSLP, pp. 697\u2013700. ISCA (1996)","DOI":"10.1109\/ICSLP.1996.607457"},{"key":"13_CR40","doi-asserted-by":"crossref","unstructured":"Valente, F., Wellekens, C.: Scoring unknown speaker clustering: VB vs. BIC. In: Proceedings of ICSLP, pp. 593\u2013596. ISCA (2004)","DOI":"10.21437\/Interspeech.2004-248"},{"key":"13_CR41","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1007\/978-3-319-49169-1_27","volume-title":"Advances in Speech and Language Technologies for Iberian Languages","author":"I Vi\u00f1als","year":"2016","unstructured":"Vi\u00f1als, I., Villalba, J., Ortega, A., Miguel, A., Lleida, E.: Bottleneck based front-end for diarization systems. In: Abad, A., et al. (eds.) IberSPEECH 2016. LNCS (LNAI), vol. 10077, pp. 276\u2013286. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-49169-1_27"},{"key":"13_CR42","doi-asserted-by":"crossref","unstructured":"Wang, G., Wu, X., Zheng, T.: Using phoneme recognition and text-dependent speaker verification to improve speaker segmentation for Chinese speech. In: Proceedings of INTERSPEECH, pp. 1457\u20131460. ISCA (2010)","DOI":"10.21437\/Interspeech.2010-148"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Wilcox, L., Chen, F., Kimber, D., Balasubramanian, V.: Segmentation of speech using speaker identification. In: Proceedings of ICASSP, vol. 1, pp. 161\u2013164. IEEE (1994)","DOI":"10.1109\/ICASSP.1994.389330"},{"key":"13_CR44","doi-asserted-by":"crossref","unstructured":"Yella, S., Motl\u00edcek, P., Bourlard, H.: Phoneme background model for information bottleneck based speaker diarization. In: Proceedings of INTERSPEECH, pp. 597\u2013601. ISCA (2014)","DOI":"10.1109\/ICASSP.2014.6853565"},{"key":"13_CR45","doi-asserted-by":"crossref","unstructured":"Yella, S., Stolcke, A., Slaney, M.: Artificial neural network features for speaker diarization. In: Proceedings of SLT Workshop, pp. 402\u2013406. IEEE (2014)","DOI":"10.1109\/SLT.2014.7078608"},{"issue":"11","key":"13_CR46","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1109\/LSP.2011.2169453","volume":"18","author":"L Z\u00e2o","year":"2011","unstructured":"Z\u00e2o, L., Coelho, R.: Colored noise based multicondition training technique for robust speaker identification. IEEE Signal Process. Lett. 18(11), 675\u2013678 (2011)","journal-title":"IEEE Signal Process. Lett."},{"key":"13_CR47","doi-asserted-by":"crossref","unstructured":"Zibert, J., Mihelic, F.: Prosodic and phonetic features for speaker clustering in speaker diarization systems. In: Proceedings of INTERSPEECH, pp. 1033\u20131036. ISCA (2011)","DOI":"10.21437\/Interspeech.2011-387"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99579-3_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T04:54:58Z","timestamp":1661835298000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319995786","9783319995793"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99579-3_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]}}}