{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T18:07:35Z","timestamp":1773079655615,"version":"3.50.1"},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2011,7,3]],"date-time":"2011-07-03T00:00:00Z","timestamp":1309651200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Evolving Systems"],"published-print":{"date-parts":[[2011,9]]},"DOI":"10.1007\/s12530-011-9034-1","type":"journal-article","created":{"date-parts":[[2011,7,4]],"date-time":"2011-07-04T05:29:19Z","timestamp":1309757359000},"page":"199-214","source":"Crossref","is-referenced-by-count":4,"title":["Adaptive systems for unsupervised speaker tracking and speech recognition"],"prefix":"10.1007","volume":"2","author":[{"given":"Tobias","family":"Herbig","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Franz","family":"Gerl","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wolfgang","family":"Minker","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Reinhold","family":"Haeb-Umbach","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2011,7,3]]},"reference":[{"issue":"8","key":"9034_CR1","doi-asserted-by":"crossref","first-page":"649","DOI":"10.1109\/LSP.2004.831666","volume":"11","author":"J Ajmera","year":"2004","unstructured":"Ajmera J, McCowan I, Bourlard H (2004) Robust speaker change detection. IEEE Signal Process Lett 11(8):649\u2013651","journal-title":"IEEE Signal Process Lett"},{"issue":"2","key":"9034_CR2","doi-asserted-by":"crossref","first-page":"498","DOI":"10.1109\/TASL.2006.881689","volume":"15","author":"P Angkititrakul","year":"2007","unstructured":"Angkititrakul P, Hansen JHL (2007) Discriminative in-set\/out-of-set speaker recognition. IEEE Trans Audio Speech Lang Process 15(2):498\u2013508","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"9034_CR3","doi-asserted-by":"crossref","unstructured":"Botterweck H (2001) Anisotropic MAP defined by eigenvoices for large vocabulary continuous speech recognition. In: IEEE international conference on acoustics, speech, and signal processing, ICASSP-2001, vol 1, pp 353\u2013356","DOI":"10.1109\/ICASSP.2001.940840"},{"issue":"9","key":"9034_CR4","doi-asserted-by":"crossref","first-page":"1437","DOI":"10.1109\/5.628714","volume":"85","author":"JP Campbell","year":"1997","unstructured":"Campbell JP (1997) Speaker recognition\u2014a tutorial. Proc IEEE 85(9):1437\u20131462","journal-title":"Proc IEEE"},{"key":"9034_CR5","unstructured":"Chen SS, Gopalakrishnan PS (1998) Speaker, environment and channel change detection and clustering via the Bayesian information criterion. In: Proceedings of the DARPA broadcast news transcription and understanding workshop, pp 127\u2013132"},{"key":"9034_CR6","unstructured":"Cheng S-S, Wang H-M (2003) A sequential metric-based audio segmentation method via the Bayesian information criterion. In: EUROSPEECH-2003, pp 945\u2013948"},{"key":"9034_CR8","doi-asserted-by":"crossref","unstructured":"Class F, Kaltenmeier A, Regel-Brietzmann P (1993) Optimization of an HMM-based continuous speech recognizer. In: EUROSPEECH-1993, pp 803\u2013806","DOI":"10.21437\/Eurospeech.1993-142"},{"key":"9034_CR7","unstructured":"Class F, Haiber U, Kaltenmeier A (2003) Automatic detection of change in speaker in speaker adaptive speech recognition systems. US patent application 2003\/0187645 A1"},{"issue":"1","key":"9034_CR9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster AP, Laird NM, Rubin DB (1977) Maximum likelihood from incomplete data via the EM algorithm. J R Stat Soc Ser B 39(1):1\u201338","journal-title":"J R Stat Soc Ser B"},{"key":"9034_CR10","doi-asserted-by":"crossref","unstructured":"Dobler S, R\u00fchl H-W (1995) Speaker adaptation for telephone based speech dialogue systems. In: EUROSPEECH-1995, pp 1139\u20131143","DOI":"10.21437\/Eurospeech.1995-285"},{"key":"9034_CR11","volume-title":"Pattern classification","author":"RO Duda","year":"2001","unstructured":"Duda RO, Hart PE, Stork DG (2001) Pattern classification. 2nd edn. Wiley-Interscience, New York","edition":"2"},{"key":"9034_CR12","unstructured":"Eatock JP, Mason JS (1994) A quantitative assessment of the relative speaker discriminating properties of phonemes. In: IEEE international conference on acoustics, speech, and signal processing, ICASSP-1994, vol 1, pp 133\u2013136"},{"key":"9034_CR13","doi-asserted-by":"crossref","unstructured":"Espi M, Miyabe S, Nishimoto T, Ono N, Sagayama S (2010) Analysis on speech characteristics for robust voice activity detection. In: IEEE workshop on spoken language technology, SLT-2010, pp 139\u2013144","DOI":"10.1109\/SLT.2010.5700838"},{"key":"9034_CR14","doi-asserted-by":"crossref","unstructured":"Fink GA (2003) Mustererkennung mit Markov-Modellen: Theorie-Praxis-Anwendungsgebiete. Leitf\u00e4den der Informatik. B. G. Teubner, Stuttgart (in German)","DOI":"10.1007\/978-3-322-80065-7"},{"key":"9034_CR15","doi-asserted-by":"crossref","unstructured":"Fortuna J, Sivakumaran P, Ariyaeeinia A, Malegaonkar A (2005) Open-set speaker identification using adapted Gaussian mixture models. In: INTERSPEECH-2005, pp 1997\u20132000","DOI":"10.21437\/Interspeech.2005-627"},{"key":"9034_CR16","doi-asserted-by":"crossref","unstructured":"Furui S (2009) Selected topics from 40\u00a0years of research in speech and speaker recognition. In: INTERSPEECH-2009, pp 1\u20138","DOI":"10.21437\/Interspeech.2009-1"},{"issue":"2","key":"9034_CR17","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1109\/89.279278","volume":"2","author":"J-L Gauvain","year":"1994","unstructured":"Gauvain J-L, Lee C-H (1994) Maximum a posteriori estimation for multivariate Gaussian mixture observations of Markov chains. IEEE Trans Speech Audio Process 2(2):291\u2013298","journal-title":"IEEE Trans Speech Audio Process"},{"key":"9034_CR18","doi-asserted-by":"crossref","unstructured":"Geiger J, Wallhoff F, Rigoll G (2010) GMM-UBM based open-set online speaker diarization. In: INTERSPEECH-2010, pp 2330\u20132333","DOI":"10.21437\/Interspeech.2010-638"},{"key":"9034_CR19","unstructured":"Gutman D, Bistritz Y (2002) Speaker verification using phoneme-adapted Gaussian mixture models. In: The XI European signal processing conference, EUSIPCO-2002, vol 3, pp 85\u201388"},{"key":"9034_CR20","unstructured":"Hain T, Johnson SE, Tuerk A, Woodland PC, Young SJ (1998) Segment generation and clustering in the HTK broadcast news transcription system. In: Proceedings of the broadcast news transcription and understanding workshop, pp 133\u2013137"},{"key":"9034_CR21","doi-asserted-by":"crossref","unstructured":"Harrag A, Mohamadi T, Serignat JF (2005) LDA combination of pitch and MFCC features in speaker recognition. In: IEEE Indicon conference, pp 237\u2013240","DOI":"10.1109\/INDCON.2005.1590163"},{"key":"9034_CR22","doi-asserted-by":"crossref","unstructured":"Herbig T, Gerl F, Minker W (2010a) Detection of unknown speakers in an unsupervised speech controlled system. In: Lee GG, Mariani J, Minker W, Nakamura S (eds) Spoken dialogue systems for ambient environments: second international workshop on spoken dialogue systems technology, IWSDS-2010. Lecture notes in computer science, vol 6392. Springer, Heidelberg, pp 25\u201335","DOI":"10.1007\/978-3-642-16202-2_3"},{"key":"9034_CR23","doi-asserted-by":"crossref","unstructured":"Herbig T, Gerl F, Minker W (2010b) Evaluation of two approaches for speaker specific speech recognition. In: Lee GG, Mariani J, Minker W, Nakamura S (eds) Spoken dialogue systems for ambient environments: second international workshop on spoken dialogue systems technology, IWSDS-2010. Lecture notes in computer science, vol 6392. Springer, Heidelberg, pp 36\u201347","DOI":"10.1007\/978-3-642-16202-2_4"},{"key":"9034_CR24","doi-asserted-by":"crossref","unstructured":"Herbig T, Gerl F, Minker W (2010c) Fast adaptation of speech and speaker characteristics for enhanced speech recognition in adverse intelligent environments. In: The 6th international conference on intelligent environments, IE-2010, pp 100\u2013105","DOI":"10.1109\/IE.2010.26"},{"key":"9034_CR25","doi-asserted-by":"crossref","unstructured":"Herbig T, Gerl F, Minker W (2010d) Simultaneous speech recognition and speaker identification. In: IEEE workshop on spoken language technology, SLT-2010, pp 206\u2013210","DOI":"10.1109\/SLT.2010.5700854"},{"key":"9034_CR26","doi-asserted-by":"crossref","unstructured":"Herbig T, Gerl F, Minker W (2010e) Speaker tracking in an unsupervised speech controlled system. In: INTERSPEECH-2010, pp 2666\u20132669","DOI":"10.21437\/Interspeech.2010-707"},{"key":"9034_CR27","doi-asserted-by":"crossref","unstructured":"Herbig T, Gerl F, Minker W (2011a) Evolution of an adaptive unsupervised speech controlled system. In: IEEE workshop on evolving and adaptive intelligent systems, EAIS-2011, pp 163\u2013169","DOI":"10.1109\/EAIS.2011.5945906"},{"key":"9034_CR28","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-19899-1","volume-title":"Self-learning speaker identification: a system for enhanced speech recognition. Signals and communication technology","author":"T Herbig","year":"2011","unstructured":"Herbig T, Gerl F, Minker W (2011) Self-learning speaker identification: a system for enhanced speech recognition. Signals and communication technology. Springer, Heidelberg"},{"key":"9034_CR29","unstructured":"Iskra D, Grosskopf B, Marasek K, van den Heuvel H, Diehl F, Kiessling A (2002) SPEECON-speech databases for consumer devices: database specification and validation. In: Proceedings of the third international conference on language resources and evaluation, LREC-2002, pp 329\u2013333"},{"key":"9034_CR30","doi-asserted-by":"crossref","unstructured":"Johnson SE (1999) Who spoke when? Automatic segmentation and clustering for determining speaker turns. In: EUROSPEECH-1999, vol 5, pp 2211\u20132214","DOI":"10.21437\/Eurospeech.1999-490"},{"key":"9034_CR31","volume-title":"Robust speech recognition in embedded systems and PC applications","author":"J-C Junqua","year":"2000","unstructured":"Junqua J-C (2000) Robust speech recognition in embedded systems and PC applications. Kluwer, Dordrecht"},{"issue":"6","key":"9034_CR32","doi-asserted-by":"crossref","first-page":"695","DOI":"10.1109\/89.876308","volume":"8","author":"R Kuhn","year":"2000","unstructured":"Kuhn R, Junqua J-C, Nguyen P, Niedzielski N (2000) Rapid speaker adaptation in eigenvoice space. IEEE Trans Speech Audio Process 8(6):695\u2013707","journal-title":"IEEE Trans Speech Audio Process"},{"key":"9034_CR34","doi-asserted-by":"crossref","unstructured":"Kwon S, Narayanan SS (2002) Speaker change detection using a new weighted distance measure. In: International conference on spoken language processing, ICSLP-2002, pp 2537\u20132540","DOI":"10.21437\/ICSLP.2002-660"},{"issue":"5","key":"9034_CR33","doi-asserted-by":"crossref","first-page":"1004","DOI":"10.1109\/TSA.2005.851981","volume":"13","author":"S Kwon","year":"2005","unstructured":"Kwon S, Narayanan S (2005) Unsupervised speaker indexing using generic models. IEEE Trans Speech Audio Process 13(5):1004\u20131013","journal-title":"IEEE Trans Speech Audio Process"},{"key":"9034_CR35","doi-asserted-by":"crossref","unstructured":"Lu L, Zhang HJ (2002) Speaker change detection and tracking in real-time news broadcasting analysis. In: Proceedings of the tenth ACM international conference on multimedia, MULTIMEDIA-2002, pp 602\u2013610","DOI":"10.1145\/641007.641127"},{"key":"9034_CR36","doi-asserted-by":"crossref","unstructured":"Meinedo H, Neto J (2003) Audio segmentation, classification and clustering in a broadcast news task. In: IEEE international conference on acoustics, speech, and signal processing, ICASSP-2003, vol 2, pp 5\u20138","DOI":"10.1109\/ICASSP.2003.1202280"},{"key":"9034_CR37","doi-asserted-by":"crossref","unstructured":"Mori K, Nakagawa S (2001) Speaker change detection and speaker clustering using VQ distortion for broadcast news speech recognition. In: IEEE international conference on acoustics, speech, and signal processing, ICASSP-2001, vol 1, pp 413\u2013416","DOI":"10.1109\/ICASSP.2001.940855"},{"issue":"3","key":"9034_CR38","doi-asserted-by":"crossref","first-page":"1058","DOI":"10.1093\/ietisy\/e89-d.3.1058","volume":"E89-D","author":"S Nakagawa","year":"2006","unstructured":"Nakagawa S, Zhang W, Takahashi M (2006) Text-independent\/text-prompted speaker recognition by combining speaker-specific GMM with speaker adapted syllable-based HMM. IEICE Trans Inf Syst E89-D(3):1058\u20131065","journal-title":"IEICE Trans Inf Syst"},{"key":"9034_CR39","doi-asserted-by":"crossref","unstructured":"Nishida M, Kawahara T (2004) Speaker indexing and adaptation using speaker clustering based on statistical model selection. In: IEEE international conference on acoustics, speech, and signal processing, ICASSP-2004, vol 1, pp 353\u2013356","DOI":"10.1109\/ICASSP.2004.1325995"},{"issue":"4","key":"9034_CR40","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1109\/TSA.2005.848890","volume":"13","author":"M Nishida","year":"2005","unstructured":"Nishida M, Kawahara T (2005) Speaker model selection based on the Bayesian information criterion applied to unsupervised speaker indexing. IEEE Trans Speech Audio Process 13(4):583\u2013592","journal-title":"IEEE Trans Speech Audio Process"},{"key":"9034_CR41","unstructured":"O\u2019Shaughnessy D (2000) Speech communications: human and machine, 2nd edn. IEEE Press, New York"},{"key":"9034_CR42","doi-asserted-by":"crossref","unstructured":"Park A, Hazen TJ (2002) ASR dependent techniques for speaker identification. In: International conference on spoken language processing, ICSLP-2002, pp 1337\u20131340","DOI":"10.21437\/ICSLP.2002-407"},{"key":"9034_CR43","doi-asserted-by":"crossref","unstructured":"Pelecanos J, Slomka S, Sridharan S (1999) Enhancing automatic speaker identification using phoneme clustering and frame based parameter and frame size selection. In: Proceedings of the fifth international symposium on signal processing and its applications, ISSPA-1999, vol 2, pp 633\u2013636","DOI":"10.1109\/ISSPA.1999.815752"},{"key":"9034_CR44","unstructured":"Rabiner L, Juang B-H (1993) Fundamentals of speech recognition. Prentice Hall, Englewood Cliffs"},{"key":"9034_CR45","unstructured":"Ram\u00edrez J, G\u00f3rriz JM, Segura JC (2007) Voice activity detection. Fundamentals and speech recognition system robustness. In: Grimm M, Kroschel K (eds) Robust speech recognition and understanding. I-Tech Education and Publishing, Vienna, pp 1\u201322"},{"issue":"3","key":"9034_CR46","doi-asserted-by":"crossref","first-page":"46","DOI":"10.1109\/97.372913","volume":"2","author":"DA Reynolds","year":"1995","unstructured":"Reynolds DA (1995) Large population speaker identification using clean and telephone speech. IEEE Signal Process Lett 2(3):46\u201348","journal-title":"IEEE Signal Process Lett"},{"key":"9034_CR47","unstructured":"Reynolds DA, Carlson BA (1995) Text-dependent speaker verification using decoupled and integrated speaker and speech recognizers. In: EUROSPEECH-1995, pp 647\u2013650"},{"issue":"1","key":"9034_CR49","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1109\/89.365379","volume":"3","author":"DA Reynolds","year":"1995","unstructured":"Reynolds DA, Rose RC (1995) Robust text-independent speaker identification using Gaussian mixture speaker models. IEEE Trans Speech Audio Process 3(1):72\u201383","journal-title":"IEEE Trans Speech Audio Process"},{"issue":"1\u20133","key":"9034_CR48","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds DA, Quatieri TF, Dunn RB (2000) Speaker verification using adapted Gaussian mixture models. Digital Signal Process 10(1\u20133):19\u201341","journal-title":"Digital Signal Process"},{"key":"9034_CR50","doi-asserted-by":"crossref","unstructured":"Rodr\u00edguez-Li\u00f1ares L, Garc\u00eda-Mateo C (1997) On the use of acoustic segmentation in speaker identification. In: EUROSPEECH-1997, pp 2315\u20132318","DOI":"10.21437\/Eurospeech.1997-609"},{"key":"9034_CR51","doi-asserted-by":"crossref","unstructured":"Schmalenstroeer J, Haeb-Umbach R (2007) Joint speaker segmentation, localization and identification for streaming audio. In: INTERSPEECH-2007, pp 570\u2013573","DOI":"10.21437\/Interspeech.2007-252"},{"key":"9034_CR52","unstructured":"Schukat-Talamazzini EG (1995) Automatische Spracherkennung. Vieweg, Braunschweig (in German)"},{"key":"9034_CR53","unstructured":"Siegler MA, Jain U, Raj B, Stern RM (1997) Automatic segmentation, classification and clustering of broadcast news audio. In: Proceedings of the DARPA speech recognition workshop, pp 97\u201399"},{"key":"9034_CR54","unstructured":"Thompson J, Mason JS (1993) Cepstral statistics within phonetic subgroups. In: International conference on signal processing, ICSP-1993, pp 737\u2013740"},{"key":"9034_CR55","doi-asserted-by":"crossref","unstructured":"Tritschler A, Gopinath R (1999) Improved speaker segmentation and segments clustering using the Bayesian information criterion. In: EUROSPEECH-1999, vol 2, pp 679\u2013682","DOI":"10.21437\/Eurospeech.1999-174x"},{"key":"9034_CR56","unstructured":"Wilcox L, Kimber D, Chen F (1994) Audio indexing using speaker identification. In: Proceedings of the SPIE conference on automatic systems for the inspection and identification of humans, vol 2277, pp 149\u2013157"},{"key":"9034_CR57","unstructured":"Wu T, Lu L, Chen K, Zhang H-J (2003) UBM-based real-time speaker segmentation for broad-casting news. In: IEEE international conference on acoustics, speech, and signal processing, ICASSP-2003, vol 2, pp 193\u2013196"},{"key":"9034_CR58","doi-asserted-by":"crossref","unstructured":"Yella SH, Varma V, Prahallad K (2010) Significance of anchor speaker segments for constructing extractive audio summaries of broadcast news. In: IEEE workshop on spoken language technology, SLT-2010, pp 13\u201318","DOI":"10.1109\/SLT.2010.5700815"},{"key":"9034_CR59","unstructured":"Yin S-C, Rose R, Kenny P (2008) Adaptive score normalization for progressive model adaptation in text independent speaker verification. In: IEEE international conference on acoustics, speech and signal processing, ICASSP-2008, pp 4857\u20134860"},{"key":"9034_CR60","unstructured":"Zhang Z-P, Furui S, Ohtsuki K (2000) On-line incremental speaker adaptation with automatic speaker change detection. In: IEEE international conference of acoustics, speech, and signal processing, ICASSP-2000, vol 2, pp 961\u2013964"},{"key":"9034_CR61","doi-asserted-by":"crossref","unstructured":"Zhou B, Hansen JHL (2000) Unsupervised audio stream segmentation and clustering via the Bayesian information criterion. In: International conference on spoken language processing, ICSLP-2000, vol 3, pp 714\u2013717","DOI":"10.21437\/ICSLP.2000-635"},{"key":"9034_CR62","doi-asserted-by":"crossref","unstructured":"Zhu X, Barras C, Meignier S, Gauvain J-L (2005) Combining speaker identification and BIC for speaker diarization. In: INTERSPEECH-2005, pp 2441\u20132444","DOI":"10.21437\/Interspeech.2005-651"},{"key":"9034_CR63","doi-asserted-by":"crossref","unstructured":"Zochov\u00e1 P, Radov\u00e1 V (2005) Modified DISTBIC algorithm for speaker change detection. In: INTERSPEECH-2005, pp 3073\u20133076","DOI":"10.21437\/Interspeech.2005-659"}],"container-title":["Evolving Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12530-011-9034-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12530-011-9034-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12530-011-9034-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,6]],"date-time":"2025-03-06T21:47:44Z","timestamp":1741297664000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12530-011-9034-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,7,3]]},"references-count":63,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2011,9]]}},"alternative-id":["9034"],"URL":"https:\/\/doi.org\/10.1007\/s12530-011-9034-1","relation":{},"ISSN":["1868-6478","1868-6486"],"issn-type":[{"value":"1868-6478","type":"print"},{"value":"1868-6486","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,7,3]]}}}