{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T12:29:34Z","timestamp":1743078574285,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031404979"},{"type":"electronic","value":"9783031404986"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-40498-6_23","type":"book-chapter","created":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T23:02:34Z","timestamp":1692745354000},"page":"258-269","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An Online Diarization Approach for\u00a0Streaming Applications Based on\u00a0Tree-Clustering and\u00a0Bayesian Resegmentation"],"prefix":"10.1007","author":[{"given":"Juan M.","family":"Mart\u00edn-Do\u00f1as","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haritz","family":"Arzelus","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aitor","family":"\u00c1lvarez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joaqu\u00edn","family":"Arellano","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,23]]},"reference":[{"key":"23_CR1","doi-asserted-by":"publisher","first-page":"101317","DOI":"10.1016\/j.csl.2021.101317","volume":"72","author":"T Park","year":"2022","unstructured":"Park, T., Kanda, N., Dimitriadis, D., Han, K., Watanabe, S., Narayanan, S.: A review of speaker diarization: recent advances with deep learning. Comput. Speech Lang. 72, 101317 (2022)","journal-title":"Comput. Speech Lang."},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., Khudanpur, S.: X-vectors: Robust DNN embeddings for speaker recognition. In: Proceedings of the ICASSP, pp. 5329\u20135333 (2018)","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Ryant, N., et al.: The second DIHARD diarization challenge: dataset, task, and baselines. In: Proceedings of the InterSpeech, pp. 978\u2013982 (2019)","DOI":"10.21437\/Interspeech.2019-1268"},{"key":"23_CR4","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1109\/LSP.2019.2961071","volume":"27","author":"T Park","year":"2019","unstructured":"Park, T., Han, K., Kumar, M., Narayanan, S.: Auto-tuning spectral clustering for speaker diarization using normalized maximum eigengap. IEEE Sign. Process. Lett. 27, 381\u2013385 (2019)","journal-title":"IEEE Sign. Process. Lett."},{"key":"23_CR5","doi-asserted-by":"publisher","first-page":"101254","DOI":"10.1016\/j.csl.2021.101254","volume":"71","author":"F Landini","year":"2022","unstructured":"Landini, F., Profant, J., Diez, M., Burget, L.: Bayesian HMM clustering of x-vector sequences (VBx) in speaker diarization: theory, implementation and analysis on standard tasks. Comput. Speech Lang. 71, 101254 (2022)","journal-title":"Comput. Speech Lang."},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Medennikov, I., et al.: Target-speaker voice activity detection: a novel approach for multi-speaker diarization in a dinner party scenario. In: Proceedings of the Interspeech, pp. 274\u2013278 (2020)","DOI":"10.21437\/Interspeech.2020-1602"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Maiti, S., Erdogan, H., Wilson, K., Wisdom, S., Watanabe, S., Hershey, J.R.: End-to-end diarization for variable number of speakers with local-global networks and discriminative speaker embeddings. In: Proceedings of the ICASSP, pp. 7183\u20137187 (2021)","DOI":"10.1109\/ICASSP39728.2021.9414841"},{"key":"23_CR8","doi-asserted-by":"crossref","unstructured":"Kinoshita, K., Delcroix, M., Tawara, N.: Integrating end-to-end neural and clustering-based diarization: getting the best of both worlds. In: Proceedings of the ICASSP, pp. 7198\u20137202 (2021)","DOI":"10.1109\/ICASSP39728.2021.9414333"},{"key":"23_CR9","doi-asserted-by":"publisher","first-page":"1493","DOI":"10.1109\/TASLP.2022.3162080","volume":"30","author":"S Horiguchi","year":"2022","unstructured":"Horiguchi, S., Fujita, Y., Watanabe, S., Xue, Y., Garcia, P.: Encoder-decoder based attractors for end-to-end neural diarization. IEEE\/ACM Trans. Audio, Speech, Lang. Process. 30, 1493\u20131507 (2022)","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Vi\u00f1als, I., Gimeno, P., Ortega, A., Miguel, A., Lleida, E.: ViVoLAB speaker diarization system for the DIHARD 2019 Challenge. In: Proceedings of the InterSpeech, pp. 988\u2013992 (2019)","DOI":"10.21437\/Interspeech.2019-2462"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Zhang, A., Wang, Q., Zhu, Z., Paisley, J., Wang, C.: Fully supervised speaker diarization. In: Proceedings of the ICASSP, pp. 6301\u20136305 (2019)","DOI":"10.1109\/ICASSP.2019.8683892"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Low-latency online speaker diarization with graph-based label generation. In: Proceedings of the Odyssey, pp. 162\u2013169 (2022)","DOI":"10.21437\/Odyssey.2022-23"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Yue, Y., Du, J., He, M., Yang, Y., Wang, R.: Online speaker diarization with core samples selection. In: Proceedings of the InterSpeech, pp. 1466\u20131470 (2022)","DOI":"10.21437\/Interspeech.2022-10363"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Xue, Y., Horiguchi, S., Fujita, Y., Watanabe, S., Garc\u00eda, P., Nagamatsu, K.: Online end-to-end neural diarization with speaker-tracing buffer. In: Proceedings of the IEEE SLT, pp. 841\u2013848 (2021)","DOI":"10.1109\/SLT48900.2021.9383523"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Xue, Y., et al.: Online streaming end-to-end neural diarization handling overlapping speech and flexible numbers of speakers. In: Proceedings of the InterSpeech, pp. 3116\u20133120 (2021)","DOI":"10.21437\/Interspeech.2021-708"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Wang, W., Lin, Q., Li, M.: Online target speaker voice activity detection for speaker diarization. In: Proceedings of the InterSpeech, pp. 1441\u20131445 (2022)","DOI":"10.21437\/Interspeech.2022-677"},{"issue":"18","key":"23_CR17","doi-asserted-by":"publisher","first-page":"8521","DOI":"10.3390\/app11188521","volume":"11","author":"I Vi\u00f1als","year":"2021","unstructured":"Vi\u00f1als, I., Ortega, A., Miguel, A., Lleida, E.: The domain mismatch problem in the broadcast speaker attribution task. Appl. Sci. 11(18), 8521 (2021)","journal-title":"Appl. Sci."},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Fini, E., Brutti, A.: Supervised online diarization with sample mean loss for multi-domain data. In: Proceedings of the ICASSP, pp. 7134\u20137138 (2020)","DOI":"10.1109\/ICASSP40776.2020.9053477"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Xia, W., et al.: Turn-to-diarize: online speaker diarization constrained by transformer transducer speaker turn detection. In: Proceedings of the ICASSP, pp. 8077\u20138081 (2022)","DOI":"10.1109\/ICASSP43922.2022.9746531"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Chen, Y., Guo, Y., Li, Q., Cheng, G., Zhang, P., Yan, Y.: Interrelate training and searching: a unified online clustering framework for speaker diarization. In: Proceedings of the InterSpeech, pp. 1456\u20131460 (2022)","DOI":"10.21437\/Interspeech.2022-944"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Coria, J., Bredin, H., Ghannay, S., Rosset, S.: Overlap-aware low-latency online speaker diarization based on end-to-end local segmentation. In: Proceedings of the IEEE ASRU, pp. 1139\u20131146 (2021)","DOI":"10.1109\/ASRU51503.2021.9688044"},{"key":"23_CR22","doi-asserted-by":"publisher","first-page":"706","DOI":"10.1109\/TASLP.2022.3233237","volume":"31","author":"S Horiguchi","year":"2023","unstructured":"Horiguchi, S., Watanabe, S., Garc\u00eda, P., Takashima, Y., Kawaguchi, Y.: Online neural diarization of unlimited numbers of speakers using global and local attractors. IEEE\/ACM Trans. Audio, Speech, Lang. Process. 31, 706\u2013720 (2023)","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"issue":"8","key":"23_CR23","first-page":"2461","volume":"12","author":"D Blei","year":"2011","unstructured":"Blei, D., Frazier, P.: Distance dependent Chinese restaurant processes. J. Mach. Learn. Res. 12(8), 2461\u20132488 (2011)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"23_CR24","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1109\/TIT.1971.1054572","volume":"17","author":"F Jelinek","year":"1971","unstructured":"Jelinek, F., Anderson, J.: Instrumentable tree encoding of information sources. IEEE Trans. Inf. Theory 17(1), 118\u2013119 (1971)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"23_CR25","unstructured":"Ortega, A., Vinals, I., Miguel, A., Lleida, E.: The Albayzin 2016 speaker diarization evaluation. Proc. IberSpeech (2016)"},{"issue":"4","key":"23_CR26","doi-asserted-by":"publisher","first-page":"1889","DOI":"10.3390\/app12041889","volume":"12","author":"A \u00c1lvarez","year":"2022","unstructured":"\u00c1lvarez, A., Arzelus, H., Torre, I., Gonz\u00e1lez-Docasal, A.: Evaluating novel speech transcription architectures on the Spanish RTVE2020 database. Appl. Sci. 12(4), 1889 (2022)","journal-title":"Appl. Sci."},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"23_CR28","doi-asserted-by":"publisher","first-page":"101027","DOI":"10.1016\/j.csl.2019.101027","volume":"60","author":"A Nagrani","year":"2020","unstructured":"Nagrani, A., Chung, J., Xie, W., Zisserman, A.: VoxCeleb: large-scale speaker verification in the wild. Comput. Speech Lang. 60, 101027 (2020)","journal-title":"Comput. Speech Lang."},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Fan, Y., et al.: CN-Celeb: a challenging Chinese speaker recognition dataset. In: Proceedings of the ICASSP, pp. 7604\u20137608 (2020)","DOI":"10.1109\/ICASSP40776.2020.9054017"},{"key":"23_CR30","doi-asserted-by":"publisher","first-page":"1542","DOI":"10.1109\/TASLP.2021.3073596","volume":"29","author":"H Dinkel","year":"2021","unstructured":"Dinkel, H., Wang, S., Xu, X., Wu, M., Yu, K.: Voice activity detection in the wild: a data-driven approach using teacher-student training. IEEE\/ACM Trans. Audio, Speech, Lang. Process. 29, 1542\u20131555 (2021)","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"issue":"24","key":"23_CR31","doi-asserted-by":"publisher","first-page":"5412","DOI":"10.3390\/app9245412","volume":"9","author":"E Lleida","year":"2019","unstructured":"Lleida, E., et al.: Albayzin 2018 evaluation: the IberSpeech-RTVE challenge on speech technologies for Spanish broadcast media. Appl. Sci. 9(24), 5412 (2019)","journal-title":"Appl. Sci."},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Bredin, H., et al.: Pyannote. audio: neural building blocks for speaker diarization. In: Proceedings of the ICASSP, pp. 7124\u20137128 (2020)","DOI":"10.1109\/ICASSP40776.2020.9052974"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-40498-6_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T23:05:57Z","timestamp":1692745557000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-40498-6_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031404979","9783031404986"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-40498-6_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"23 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pilsen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.kiv.zcu.cz\/tsd2023\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMS & back-office system","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"64","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"48% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.56","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}