{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T16:39:51Z","timestamp":1775234391714,"version":"3.50.1"},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,8,4]],"date-time":"2022-08-04T00:00:00Z","timestamp":1659571200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,4]],"date-time":"2022-08-04T00:00:00Z","timestamp":1659571200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1007\/s10772-022-09990-9","type":"journal-article","created":{"date-parts":[[2022,8,4]],"date-time":"2022-08-04T16:02:50Z","timestamp":1659628970000},"page":"933-945","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Robust acoustic domain identification with its application to speaker diarization"],"prefix":"10.1007","volume":"25","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0616-7608","authenticated-orcid":false,"given":"A Kishore","family":"Kumar","sequence":"first","affiliation":[]},{"given":"Shefali","family":"Waldekar","sequence":"additional","affiliation":[]},{"given":"Md","family":"Sahidullah","sequence":"additional","affiliation":[]},{"given":"Goutam","family":"Saha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,4]]},"reference":[{"issue":"2","key":"9990_CR1","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1109\/TASL.2011.2125954","volume":"20","author":"X Anguera","year":"2012","unstructured":"Anguera, X., Bozonnet, S., Evans, N., Fredouille, C., Friedland, G., & Vinyals, O. (2012). Speaker diarization: A review of recent research. IEEE Transactions on Audio, Speech, and Language Processing, 20(2), 356\u2013370.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9990_CR2","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., & Zisserman, A. (2017). Look, listen and learn. In Proceedings of the IEEE international conference on computer vision (pp. 609\u2013617).","DOI":"10.1109\/ICCV.2017.73"},{"key":"9990_CR3","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/0-387-21575-1_5","volume":"18","author":"P Assmann","year":"2004","unstructured":"Assmann, P., & Summerfield, Q. (2004). The perception of speech under adverse conditions. Speech Processing in the Auditory System, 18, 231\u2013308.","journal-title":"Speech Processing in the Auditory System"},{"key":"9990_CR4","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/j.neunet.2021.03.004","volume":"140","author":"Z Bai","year":"2021","unstructured":"Bai, Z., & Zhang, X.-L. (2021). Speaker recognition based on deep learning: An overview. Neural Networks, 140, 65\u201399.","journal-title":"Neural Networks"},{"issue":"3","key":"9990_CR5","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1109\/MSP.2014.2326181","volume":"32","author":"D Barchiesi","year":"2015","unstructured":"Barchiesi, D., Giannoulis, D., Stowell, D., & Plumbley, M. D. (2015). Acoustic scene classification: Classifying environments from the sounds they produce. IEEE Signal Processing Magazine, 32(3), 16\u201334.","journal-title":"IEEE Signal Processing Magazine"},{"issue":"6","key":"9990_CR6","doi-asserted-by":"publisher","first-page":"1216","DOI":"10.1109\/TASLP.2017.2690570","volume":"25","author":"V Bisot","year":"2017","unstructured":"Bisot, V., Serizel, R., Essid, S., & Richard, G. (2017). Feature learning with matrix factorization applied to acoustic scene classification. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 25(6), 1216\u20131229.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9990_CR7","unstructured":"Chen, H., Liu, Z., Liu, Z., Zhang, P., & Yan, Y. (2019). Integrating the data augmentation scheme with various classifiers for acoustic scene modeling. Technical report, DCASE2019 Challenge"},{"key":"9990_CR8","doi-asserted-by":"crossref","unstructured":"Chung, J. S., Nagrani, A., & Zisserman, A. (2018). VoxCeleb2: Deep speaker recognition. In Proceedings of INTERSPEECH, ISCA, (pp. 1086\u20131090).","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"9990_CR9","doi-asserted-by":"crossref","unstructured":"Cramer, J., Wu, H.-H. Salamon, J., & Bello, J. P. (2019). Look, listen, and learn more: Design choices for deep audio embeddings. In Proceedings of ICASSP (pp. 3852\u20133856). IEEE.","DOI":"10.1109\/ICASSP.2019.8682475"},{"issue":"4","key":"9990_CR10","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2010","unstructured":"Dehak, N., Kenny, P. J., Dehak, R., Dumouchel, P., & Ouellet, P. (2010). Front-end factor analysis for speaker verification. IEEE Transactions on Audio, Speech, and Language Processing, 19(4), 788\u2013798.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9990_CR11","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1109\/TASLP.2019.2955293","volume":"28","author":"M Diez","year":"2019","unstructured":"Diez, M., Burget, L., Landini, F., & \u010cernock\u1ef3, J. (2019). Analysis of speaker diarization based on Bayesian HMM with Eigenvoice priors. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 28, 355\u2013368.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9990_CR12","unstructured":"Dorfer, M., Lehner, B., Eghbal-zadeh, H., Christop, H., Fabian, P., & Gerhard, W. (2018). Acoustic scene classification with fully convolutional neural networks and i-vectors. Technical report, DCASE2018 Challenge"},{"key":"9990_CR13","unstructured":"Eghbal-Zadeh, H., Lehner, B., Dorfer, M., & Widmer, G. (2016). CP-JKU submissions for DCASE-2016: A hybrid approach using binaural i-vectors and deep convolutional neural networks. Technical report, DCASE2016 Challenge"},{"key":"9990_CR14","unstructured":"Fennir, T., Habib, F., & Macaire, C. (2020). Acoustic scene classification for speaker diarization. Technical report, Universit\u00e9 de Lorraine."},{"key":"9990_CR15","doi-asserted-by":"crossref","unstructured":"Garcia-Romero, D., Snyder, D., Sell, G., Povey, D., & McCree, A. (2017). Speaker diarization using deep neural network embeddings. In Proceedings of ICASSP, IEEE, 2017 (pp. 4930\u20134934).","DOI":"10.1109\/ICASSP.2017.7953094"},{"key":"9990_CR16","unstructured":"Giannoulis, D., Stowell, D., Benetos, E., Rossignol, M., Lagrange, M., & Plumbley, M. D. (2013). A database and challenge for acoustic scene classification and event detection. In Proceedings of EUSIPCO, IEEE, 2013 (pp. 1\u20135)."},{"issue":"8","key":"9990_CR17","doi-asserted-by":"publisher","first-page":"1590","DOI":"10.1109\/TASL.2008.2002085","volume":"16","author":"KJ Han","year":"2008","unstructured":"Han, K. J., Kim, S., & Narayanan, S. S. (2008). Strategies to improve the robustness of agglomerative hierarchical clustering under data source variation for speaker diarization. IEEE Transactions on Audio, Speech, and Language Processing, 16(8), 1590\u20131601.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9990_CR18","unstructured":"Heittola, T., Mesaros, A., & Virtanen, T. (2020). Acoustic scene classification in DCASE 2020 challenge: Generalization across devices and low complexity solutions. In Proceedings of the detection and classification of acoustic scenes and events 2020 workshop, Tokyo, Japan (pp. 56\u201360)."},{"key":"9990_CR19","doi-asserted-by":"crossref","unstructured":"Huijbregts, M., & Wooters, C. (2007). The blame game: Performance analysis of speaker diarization system components. In Proceedings of INTERSPEECH, ISCA, 2007, (pp. 1857\u20131860).","DOI":"10.21437\/Interspeech.2007-517"},{"issue":"4","key":"9990_CR20","doi-asserted-by":"publisher","first-page":"1435","DOI":"10.1109\/TASL.2006.881693","volume":"15","author":"P Kenny","year":"2007","unstructured":"Kenny, P., Boulianne, G., Ouellet, P., & Dumouchel, P. (2007). Joint factor analysis versus eigenchannels in speaker recognition. IEEE Transactions on Audio, Speech, and Language Processing, 15(4), 1435\u20131447.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9990_CR21","doi-asserted-by":"crossref","unstructured":"Kim, C., & Stern, R. M. (2008). Robust signal-to-noise ratio estimation based on waveform amplitude distribution analysis. In Proceedings of INTERSPEECH, ISCA, 2008 (pp. 2598\u20132601).","DOI":"10.21437\/Interspeech.2008-644"},{"key":"9990_CR22","doi-asserted-by":"crossref","unstructured":"Ko, T., Peddinti, V., Povey, D., Seltzer, M. L., & Khudanpur, S. (2017). A study on data augmentation of reverberant speech for robust speech recognition. In Proceedings of ICASSP, IEEE, 2017 (pp. 5220\u20135224).","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"9990_CR23","doi-asserted-by":"crossref","unstructured":"Landini, F., Wang, S., Diez, M., Burget, L., Mat\u011bjka, P., \u017dmol\u00edkov\u00e1, K., Mo\u0161ner, L., Silnova, A., Plchot, O., Novotn\u00fd, O., Zeinali, H., & Rohdin, J. (2020). BUT system for the second Dihard Speech Diarization challenge. In Proceedings of ICASSP 2020, IEEE (pp. 6529\u20136533).","DOI":"10.1109\/ICASSP40776.2020.9054251"},{"issue":"3\u20134","key":"9990_CR24","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1016\/S0095-4470(19)30692-8","volume":"14","author":"A L\u00f6fqvist","year":"1986","unstructured":"L\u00f6fqvist, A. (1986). The long-time-average spectrum as a tool in voice research. Journal of Phonetics, 14(3\u20134), 471\u2013475.","journal-title":"Journal of Phonetics"},{"key":"9990_CR25","doi-asserted-by":"crossref","unstructured":"Mesaros, A., Heittola, T., & Virtanen, T. (2016). TUT database for acoustic scene classification and sound event detection. In Proceedings of EUSIPCO, IEEE, 2016 (pp. 1128\u20131132).","DOI":"10.1109\/EUSIPCO.2016.7760424"},{"key":"9990_CR26","doi-asserted-by":"crossref","unstructured":"Mesaros, A., Heittola, T., & Virtanen, T. (2018a). Acoustic scene classification: An overview of DCASE 2017 challenge entries. In Proceedings of 2018, 16th International workshop on acoustic signal enhancement (IWAENC), IEEE (pp. 411\u2013415).","DOI":"10.1109\/IWAENC.2018.8521242"},{"key":"9990_CR27","unstructured":"Mesaros, A., Heittola, T., & Virtanen, T. (2018b). A multi-device dataset for urban acoustic scene classification. In Proceedings of the Detection and classification of acoustic scenes and events 2018 Workshop (pp. 9\u201313)."},{"key":"9990_CR28","doi-asserted-by":"crossref","unstructured":"Mesaros, A., Heittola, T., & Virtanen, T. (2019). Acoustic scene classification in DCASE 2019 challenge: Closed and open set classification and data mismatch setups. In Proceedings of the detection and classification of acoustic scenes and events 2019 Workshop Oct. 2019, (pp. 164\u2013168).","DOI":"10.33682\/m5kp-fa97"},{"key":"9990_CR29","doi-asserted-by":"crossref","unstructured":"Mirghafori, N., & Wooters, C. (2006). Nuts and flakes: A study of data characteristics in speaker diarization. In Proc.eedings of CASSP (Vol. 1). IEEE.","DOI":"10.1109\/ICASSP.2006.1660196"},{"key":"9990_CR30","unstructured":"Mun, S., Park, S., Han, D., & Ko, H. (2017). Generative adversarial network based acoustic scene training set augmentation and selection using SVM hyper-plane. Technical report, DCASE2017 Challenge."},{"key":"9990_CR31","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J. S., & Zisserman, A. (2017). VoxCeleb: A large-scale speaker identification dataset. In Proceedings of INTERSPEECH, ISCA (pp. 2616\u20132620).","DOI":"10.21437\/Interspeech.2017-950"},{"key":"9990_CR32","doi-asserted-by":"publisher","first-page":"101317","DOI":"10.1016\/j.csl.2021.101317","volume":"72","author":"TJ Park","year":"2022","unstructured":"Park, T. J., Kanda, N., Dimitriadis, D., Han, K. J., Watanabe, S., & Narayanan, S. (2022). A review of speaker diarization: Recent advances with deep learning. Computer Speech & Language, 72, 101317.","journal-title":"Computer Speech & Language"},{"key":"9990_CR33","doi-asserted-by":"crossref","unstructured":"Prince, S. J., & Elder, J. H. (2007). Probabilistic linear discriminant analysis for inferences about identity. In Proceedings of the IEEE International Conference on Computer Vision (pp. 1\u20138).","DOI":"10.1109\/ICCV.2007.4409052"},{"key":"9990_CR34","doi-asserted-by":"crossref","unstructured":"Raj, D., Snyder, D., Povey, D. & Khudanpur, S. (2019). Probing the information encoded in x-vectors. In Proceedings of the IEEE ASRU (pp. 726\u2013733).","DOI":"10.1109\/ASRU46091.2019.9003979"},{"issue":"1","key":"9990_CR35","first-page":"142","volume":"23","author":"A Rakotomamonjy","year":"2015","unstructured":"Rakotomamonjy, A., & Gasso, G. (2015). Histogram of gradients of time-frequency representations for audio scene classification. IEEE\/ACM Transactions on Audio, Speech and Language Processing, 23(1), 142\u2013153.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing"},{"issue":"3","key":"9990_CR36","doi-asserted-by":"publisher","first-page":"662","DOI":"10.1109\/JAS.2018.7511066","volume":"5","author":"Z Ren","year":"2018","unstructured":"Ren, Z., Qian, K., Zhang, Z., Pandit, V., Baird, A., & Schuller, B. (2018). Deep scalogram representations for acoustic scene classification. IEEE\/CAA Journal of Automatica Sinica, 5(3), 662\u2013669.","journal-title":"IEEE\/CAA Journal of Automatica Sinica"},{"issue":"1\u20133","key":"9990_CR37","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds, D. A., Quatieri, T. F., & Dunn, R. B. (2000). Speaker verification using adapted Gaussian mixture models. Digital Signal Processing, 10(1\u20133), 19\u201341.","journal-title":"Digital Signal Processing"},{"key":"9990_CR38","doi-asserted-by":"crossref","unstructured":"Roma, G., Nogueira, W. & Herrera, P. (2013). Recurrence quantification analysis features for auditory scene classification. Technical report, DCASE2013 Challenge","DOI":"10.1109\/WASPAA.2013.6701890"},{"key":"9990_CR39","unstructured":"Ryant, N., Church, K., Cieri, C., Cristia, A., Du, J., Ganapathy, S. & Liberman, M. (2018). First DIHARD challenge evaluation plan. Technical report, DIHARD challenge."},{"key":"9990_CR40","doi-asserted-by":"crossref","unstructured":"Ryant, N., Church, K., Cieri, C., Cristia, A., Du, J., Ganapathy, S., & Liberman, M. (2019). The second DIHARD diarization challenge: Dataset, task, and baselines. In Proceedings of INTERSPEECH 2019, ISCA, (pp. 978\u2013982).","DOI":"10.21437\/Interspeech.2019-1268"},{"key":"9990_CR41","unstructured":"Ryant, N., Church, K., Cieri, C., Du, J., Ganapathy, S. & Liberman, M. (2020). Third DIHARD challenge evaluation plan. Technical report, Linguistic Data Consortium, University of Pennsylvania, Philadelphia, PA"},{"key":"9990_CR42","doi-asserted-by":"crossref","unstructured":"Ryant, N., Singh, P., Krishnamohan, V., Varma, R., Church, K., Cieri, C., Du, J., Ganapathy, S., & Liberman, M. (2021). The third DIHARD diarization challenge. In Proceedings of INTERSPEECH, ISCA (pp. 3570\u20133574).","DOI":"10.21437\/Interspeech.2021-1208"},{"key":"9990_CR43","unstructured":"Sahidullah, M., Patino, J., Cornell, S., Yin, R., Sivasankaran, S., Bredin, H., Korshunov, P., Brutti, A., Serizel, R., Vincent, E., Evans, N. S. Marcel, S. Squartini, & C. Barras, (2019). The Speed submission to DIHARD II: Contributions & lessons learned. arXiv:1911.02388"},{"key":"9990_CR44","unstructured":"Sakashita, Y., & Aono, M. (2018). Acoustic scene classification by ensemble of spectrograms based on adaptive temporal divisions. Technical report, DCASE2018 Challenge"},{"key":"9990_CR45","doi-asserted-by":"crossref","unstructured":"Sell, G., & Garcia-Romero, D. (2014). Speaker diarization with PLDA i-vector scoring and unsupervised calibration. In Proceedings of the 2014 IEEE spoken language technology workshop (SLT), IEEE (pp. 413\u2013417).","DOI":"10.1109\/SLT.2014.7078610"},{"key":"9990_CR46","doi-asserted-by":"crossref","unstructured":"Sell, G., Snyder, D., McCree, A., Garcia-Romero, D., Villalba, J., Maciejewski, M., Manohar, V., Dehak, N., Povey, D., Watanabe, S., & Khudanpur, S. (2018). Diarization is hard: Some experiences and lessons learned for the JHU team in the inaugural DIHARD challenge. In Proceedings of INTERSPEECH, ISCA (pp. 2808\u20132812).","DOI":"10.21437\/Interspeech.2018-1893"},{"issue":"10","key":"9990_CR47","doi-asserted-by":"publisher","first-page":"2015","DOI":"10.1109\/TASL.2013.2264673","volume":"21","author":"SH Shum","year":"2013","unstructured":"Shum, S. H., Dehak, N., Dehak, R., & Glass, J. R. (2013). Unsupervised methods for speaker diarization: An integrated and iterative approach. IEEE Transactions on Audio, Speech, and Language Processing, 21(10), 2015\u20132028.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9990_CR48","doi-asserted-by":"crossref","unstructured":"Shum, S., Dehak, N., Chuangsuwanich, E., Reynolds, D., & Glass, J. (2011). Exploiting intra-conversation variability for speaker diarization. In Proceedings of INTERSPEECH, ISCA (pp. 945\u2013948).","DOI":"10.21437\/Interspeech.2011-383"},{"key":"9990_CR49","doi-asserted-by":"crossref","unstructured":"Sinclair, M., & King, S. (2013). Where are the challenges in speaker diarization? In Proceedings of the ICASSP, IEEE (pp. 7741\u20137745).","DOI":"10.1109\/ICASSP.2013.6639170"},{"key":"9990_CR50","doi-asserted-by":"crossref","unstructured":"Singh, P., Vardhan, H., Ganapathy, S., & Kanagasundaram, A. (2019). LEAP diarization system for the second DIHARD challenge. In Proceedings of INTERSPEECH, ISCA (pp. 983\u2013987).","DOI":"10.21437\/Interspeech.2019-2716"},{"key":"9990_CR51","unstructured":"Snyder, D., Chen, G., & Povey, D. (2015). MUSAN: A music, speech, and noise corpus. arXiv:1510.08484"},{"key":"9990_CR52","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Povey, D., & Khudanpur, S. (2017). Deep neural network embeddings for text-independent speaker verification. In Proceedings of INTERSPEECH, ISCA (pp. 999\u20131003).","DOI":"10.21437\/Interspeech.2017-620"},{"key":"9990_CR53","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., & Khudanpur, S. (2018). X-vectors: Robust DNN embeddings for speaker recognition. In Proceedings of ICASSP, IEEE, (pp. 5329\u20135333).","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"9990_CR54","unstructured":"Suh, S., Park, S., Jeong, Y., & Lee, T. (2020). Designing acoustic scene classification models with CNN variants. Technical report, DCASE2020 Challenge"},{"key":"9990_CR55","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/j.dsp.2017.12.012","volume":"75","author":"S Waldekar","year":"2018","unstructured":"Waldekar, S., & Saha, G. (2018). Classification of audio scenes with novel features in a fused system framework. Digital Signal Processing, 75, 71\u201382.","journal-title":"Digital Signal Processing"},{"issue":"11","key":"9990_CR56","doi-asserted-by":"publisher","first-page":"7911","DOI":"10.1007\/s11042-019-08279-5","volume":"79","author":"S Waldekar","year":"2020","unstructured":"Waldekar, S., & Saha, G. (2020). Analysis and classification of acoustic scenes with wavelet transform-based mel-scaled features. Multimedia Tools and Applications, 79(11), 7911\u20137926.","journal-title":"Multimedia Tools and Applications"},{"key":"9990_CR57","doi-asserted-by":"crossref","unstructured":"Wang, Q., Downey, C., Wan, L., Mansfield, P. A., & Moreno, I. L. (2018). Speaker diarization with LSTM. In Proceedings of ICASSP, IEEE (pp. 5239\u20135243).","DOI":"10.1109\/ICASSP.2018.8462628"},{"key":"9990_CR58","doi-asserted-by":"crossref","unstructured":"Wang, S., Qian, Y., & Yu, K. (2017). What does the speaker embedding encode? In Proceedings of INTERSPEECH, ISCA (pp. 1497\u20131501).","DOI":"10.21437\/Interspeech.2017-1125"},{"key":"9990_CR59","unstructured":"Wang, Y., He, M., Niu, S., Sun, L., Gao, T., Fang, X., Pan, J., Du, J., & Lee, C.-H. (2021). USTC-NELSLIP system description for DIHARD-III challenge. arXiv:2103.10661"},{"key":"9990_CR60","unstructured":"Zeinali, H., Burget, L., & Cernocky, J. H. (2018). Convolutional neural networks and x-vector embedding for DCASE2018 Acoustic Scene Classification challenge. In Proceedings of the detection and classification of acoustic scenes and events 2018 workshop (pp. 202\u2013206)."},{"key":"9990_CR61","doi-asserted-by":"crossref","unstructured":"Zhang, A., Wang, Q., Zhu, Z., Paisley, J., & Wang, C. (2019). Fully supervised speaker diarization. In Proceedings of ICASSP, IEEE (pp. 6301\u20136305).","DOI":"10.1109\/ICASSP.2019.8683892"},{"key":"9990_CR62","doi-asserted-by":"crossref","unstructured":"Zhu, W., & Pelecanos, J. (2016). Online speaker diarization using adapted i-vector transforms. In Proceedings of ICASSP, IEEE (pp. 5045\u20135049).","DOI":"10.1109\/ICASSP.2016.7472638"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09990-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-022-09990-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09990-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T11:18:09Z","timestamp":1670843889000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-022-09990-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,4]]},"references-count":62,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,12]]}},"alternative-id":["9990"],"URL":"https:\/\/doi.org\/10.1007\/s10772-022-09990-9","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,4]]},"assertion":[{"value":"15 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 July 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 August 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}