{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T13:04:46Z","timestamp":1742994286686,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031702587"},{"type":"electronic","value":"9783031702594"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70259-4_21","type":"book-chapter","created":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:02:15Z","timestamp":1725786135000},"page":"273-283","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Recent Methods and Algorithms in Speech Segmentation Tasks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4975-6493","authenticated-orcid":false,"given":"Dina","family":"Oralbekova","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8318-3794","authenticated-orcid":false,"given":"Orken","family":"Mamyrbayev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1178-6400","authenticated-orcid":false,"given":"Turdybek","family":"Kurmetkan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2153-7620","authenticated-orcid":false,"given":"Nurdaulet","family":"Zhumazhan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"21_CR1","doi-asserted-by":"publisher","unstructured":"Tranter, S.E., Reynolds, D.A.: An overview of automatic speaker diarization systems. IEEE Trans. Audio Speech Lang. Process. 14(5), 1557\u20131565 (2006). https:\/\/doi.org\/10.1109\/TASL.2006.878256","DOI":"10.1109\/TASL.2006.878256"},{"key":"21_CR2","doi-asserted-by":"publisher","unstructured":"Anguera, X., Bozonnet, S., Evans, N., Fredouille, C., Friedland, G., Vinyals, O.: Speaker diarization: a review of recent research. IEEE Trans. Audio Speech Language Process. 20, 356\u2013370 (2012). https:\/\/doi.org\/10.1109\/TASL.2011.2125954","DOI":"10.1109\/TASL.2011.2125954"},{"issue":"10","key":"21_CR3","doi-asserted-by":"publisher","first-page":"1065","DOI":"10.1016\/j.specom.2012.05.002","volume":"54","author":"M Moattar","year":"2012","unstructured":"Moattar, M., Homayounpour, M.: A review on speaker diarization systems and approaches. Speech Commun. 54(10), 1065\u20131103 (2012)","journal-title":"Speech Commun."},{"key":"21_CR4","doi-asserted-by":"publisher","unstructured":"Basu, J., et al.: An overview of speaker diarization: approaches, resources and challenges. In: 2016 Conference of the Oriental Chapter of International Committee for Coordination and Standardization of Speech Databases and Assessment Techniques (O-COCOSDA), Bali, Indonesia, pp. 166\u2013171 (2016). https:\/\/doi.org\/10.1109\/ICSDA.2016.7919005","DOI":"10.1109\/ICSDA.2016.7919005"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Park, T.J., Kanda, N., Dimitriadis, D., Han, K.J., Watanabe, S., Narayanan, S.S.: A review of speaker diarization: recent advances with deep learning. arXiv:abs\/2101.09624 (2021)","DOI":"10.1016\/j.csl.2021.101317"},{"key":"21_CR6","doi-asserted-by":"publisher","first-page":"650","DOI":"10.1137\/18M1174076","volume":"12","author":"M Pereyra","year":"2016","unstructured":"Pereyra, M.: Revisiting maximum-a-posteriori estimation in log-concave models. SIAM J. Imaging Sci. 12, 650\u2013670 (2016)","journal-title":"SIAM J. Imaging Sci."},{"key":"21_CR7","doi-asserted-by":"publisher","unstructured":"Nogales, R.E., Benalc\u00e1zar, M.E.: Analysis and evaluation of feature selection and feature extraction methods. Int. J. Comput. Intell. Syst. 16, 153 (2023). https:\/\/doi.org\/10.1007\/s44196-023-00319-1","DOI":"10.1007\/s44196-023-00319-1"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Prabakaran, D., Sriuppili, S.: Speech processing: MFCC based feature extraction techniques- an investigation. J. Phys. Conf. Ser. 1717 (2021)","DOI":"10.1088\/1742-6596\/1717\/1\/012009"},{"key":"21_CR9","doi-asserted-by":"publisher","unstructured":"Weng, Z., Li, L., Guo, D.: Speaker recognition using weighted dynamic MFCC based on GMM. In: 2010 International Conference on Anti-Counterfeiting, Security and Identification, Chengdu, China, pp. 285\u2013288 (2010). https:\/\/doi.org\/10.1109\/ICASID.2010.5551341","DOI":"10.1109\/ICASID.2010.5551341"},{"key":"21_CR10","doi-asserted-by":"publisher","unstructured":"Rahulamathavan, S., Yao, X., Yogachandran, R., Cumanan, K., Rajarajan, M.: Redesign of Gaussian mixture model for efficient and privacy-preserving speaker recognition. In: 2018 International Conference on Cyber Situational Awareness, Data Analytics and Assessment (Cyber SA), pp. 1\u20138 (2018). https:\/\/doi.org\/10.1109\/CyberSA.2018.8551477","DOI":"10.1109\/CyberSA.2018.8551477"},{"key":"21_CR11","doi-asserted-by":"publisher","unstructured":"Ibrahim, N.S., Ramli, D.A.: I-vector extraction for speaker recognition based on dimensionality reduction. Procedia Comput. Sci. 126, 1534\u20131540 (2018). https:\/\/doi.org\/10.1016\/j.procs.2018.08.126","DOI":"10.1016\/j.procs.2018.08.126"},{"key":"21_CR12","doi-asserted-by":"publisher","unstructured":"Teimoori, F., Razzazi, F.: Incomplete-data-driven speaker segmentation for diarization application; a help-training approach. Circuits Syst. Signal Process, 38, 2489\u20132522 (2019). https:\/\/doi.org\/10.1007\/s00034-018-0974-6","DOI":"10.1007\/s00034-018-0974-6"},{"key":"21_CR13","doi-asserted-by":"publisher","unstructured":"Gupta, A., Purwar, A.: Analysis of clustering algorithms for Speaker Diarization using LSTM. In: 2022 1st International Conference on Informatics (ICI), Noida, India, pp. 19\u201324 (2022). https:\/\/doi.org\/10.1109\/ICI53355.2022.9786928","DOI":"10.1109\/ICI53355.2022.9786928"},{"key":"21_CR14","doi-asserted-by":"publisher","unstructured":"Singh, P., Ganapathy, S.: Deep self-supervised hierarchical clustering for speaker diarization (2020). arXiv:2008.03960v1, https:\/\/doi.org\/10.48550\/arXiv.2008.03960","DOI":"10.48550\/arXiv.2008.03960"},{"key":"21_CR15","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1016\/j.ins.2022.11.139","volume":"622","author":"AM Ikotun","year":"2023","unstructured":"Ikotun, A.M., Ezugwu, A.E., Abualigah, L., Abuhaija, B., Heming, J.: K-means clustering algorithms: a comprehensive review, variants analysis, and advances in the era of big data. Inf. Sci. 622, 178\u2013210 (2023)","journal-title":"Inf. Sci."},{"key":"21_CR16","doi-asserted-by":"publisher","unstructured":"Mtibaa, A., Petrovska-Delacr\u00e9taz, D., Boudy, J., Hamida, A.: Privacy\u2010preserving speaker verification system based on binary I\u2010vectors. IET Biometrics (2021). https:\/\/doi.org\/10.1049\/bme2.12013","DOI":"10.1049\/bme2.12013"},{"key":"21_CR17","doi-asserted-by":"publisher","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., Khudanpur, S.: X-Vectors: robust DNN embeddings for speaker recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Calgary, AB, Canada, pp. 5329\u20135333 (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8461375","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Neururer, D., Dellwo, V., Stadelmann, T.: Deep neural networks for automatic speaker recognition do not learn supra-segmental temporal features. arXiv:abs\/2311.00489 (2023)","DOI":"10.1016\/j.patrec.2024.03.016"},{"key":"21_CR19","doi-asserted-by":"publisher","unstructured":"Chakroun, R., Frikha, M.: A deep learning approach for text-independent speaker recognition with short utterances. Multimed Tools Appl. 82, 33111\u201333133 (2023). https:\/\/doi.org\/10.1007\/s11042-023-14942-9","DOI":"10.1007\/s11042-023-14942-9"},{"key":"21_CR20","doi-asserted-by":"publisher","unstructured":"Gao, M., Zhang, X.: Improved convolutional neural network\u2013time-delay neural network structure with repeated feature fusions for speaker verification. Appl. Sci. 14, 3471 (2024). https:\/\/doi.org\/10.3390\/app14083471","DOI":"10.3390\/app14083471"},{"key":"21_CR21","doi-asserted-by":"publisher","unstructured":"Farsiani S., Izadkhah H., Lotfi S.: An optimum end-to-end text-independent speaker identification system using convolutional neural network. Comput. Electr. Eng. 100, ISSN 0045-7906, https:\/\/doi.org\/10.1016\/j.compeleceng.2022.107882 (2022)","DOI":"10.1016\/j.compeleceng.2022.107882"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Mamyrbayev, O., Kydyrbekova, A., Alimhan, K., Oralbekova, D., Zhumazhanov, B., Nuranbayeva, B.: Development of security systems using DNN and i & x-vector classifiers. Eastern-Europ. J. Enterp. Technol. 4(9 (112)), pp. 32\u201345 (2021)","DOI":"10.15587\/1729-4061.2021.239186"},{"key":"21_CR23","doi-asserted-by":"publisher","unstructured":"Oralbekova, D., Mamyrbayev, O., Othman, M., Kassymova, D., Mukhsina, K.: Contemporary approaches in evolving language models. Appl. Sci. 13(23), 12901 (2023). https:\/\/doi.org\/10.3390\/app132312901","DOI":"10.3390\/app132312901"},{"key":"21_CR24","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems (NIPS'17), Curran Associates Inc., Red Hook, NY, USA, pp. 6000\u20136010 (2017)"},{"key":"21_CR25","doi-asserted-by":"publisher","unstructured":"Mamyrbayev, O., Alimhan, K., Oralbekova, D., Bekarystankyzy, A., Zhumazhanov, B.: Identifying the influence of transfer learning method in developing an end-to-end automatic speech recognition system with a low data level. Eastern-Eur. J. Enterp. Technol. 19(115), 84\u201392 (2022). https:\/\/doi.org\/10.15587\/1729-4061.2022.252801","DOI":"10.15587\/1729-4061.2022.252801"},{"key":"21_CR26","doi-asserted-by":"publisher","unstructured":"Lai, Y., Tang, X., Fu, Y., Fang, R.: End-to-end speaker diarization with transformer. arXiv:2112.07463 (2021). https:\/\/doi.org\/10.48550\/arXiv.2112.07463","DOI":"10.48550\/arXiv.2112.07463"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Xia, W., Lu, H., Wang, Q., Tripath, A., L\u00f3pez-Moreno, I., Sak, H.: Turn-to-diarize: online speaker diarization constrained by transformer transducer speaker turn detection. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.8077\u20138081 (2022)","DOI":"10.1109\/ICASSP43922.2022.9746531"}],"container-title":["Communications in Computer and Information Science","Advances in Computational Collective Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70259-4_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:18:00Z","timestamp":1725787080000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70259-4_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031702587","9783031702594"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70259-4_21","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"9 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ICCCI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Collective Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Leipzig","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccci2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iccci.pwr.edu.pl\/2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}