{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T13:18:07Z","timestamp":1740143887459,"version":"3.37.3"},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T00:00:00Z","timestamp":1734652800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T00:00:00Z","timestamp":1734652800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"National Natural Science Foundation of China under Grants","award":["62001100"],"award-info":[{"award-number":["62001100"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J AUDIO SPEECH MUSIC PROC."],"DOI":"10.1186\/s13636-024-00385-z","type":"journal-article","created":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T05:04:14Z","timestamp":1734671054000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Domain-weighted transfer learning and discriminative embeddings for low-resource speaker verification"],"prefix":"10.1186","volume":"2024","author":[{"given":"Han","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingrui","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingjun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changzhi","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2329-895X","authenticated-orcid":false,"given":"Longting","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,20]]},"reference":[{"key":"385_CR1","doi-asserted-by":"publisher","unstructured":"G. Heigold, I. Moreno, S. Bengio, N. Shazeer, in 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). End-to-end text-dependent speaker verification (2016), pp. 5115\u20135119. https:\/\/doi.org\/10.1109\/ICASSP.2016.7472652","DOI":"10.1109\/ICASSP.2016.7472652"},{"key":"385_CR2","doi-asserted-by":"publisher","unstructured":"D. Snyder, P. Ghahremani, D. Povey, D. Garcia-Romero, Y. Carmiel, S. Khudanpur, in 2016 IEEE Spoken Language Technology Workshop (SLT). Deep neural network-based speaker embeddings for end-to-end speaker verification (2016), pp. 165\u2013170. https:\/\/doi.org\/10.1109\/SLT.2016.7846260","DOI":"10.1109\/SLT.2016.7846260"},{"key":"385_CR3","doi-asserted-by":"publisher","unstructured":"D. Snyder, D. Garcia-Romero, D. Povey, S. Khudanpur, in Proc. Interspeech 2017. Deep neural network embeddings for text-independent speaker verification (2017), pp. 999\u20131003. https:\/\/doi.org\/10.21437\/Interspeech.2017-620","DOI":"10.21437\/Interspeech.2017-620"},{"key":"385_CR4","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/j.neunet.2021.03.004","volume":"140","author":"Z Bai","year":"2021","unstructured":"Z. Bai, X.L. Zhang, Speaker recognition based on deep learning: An overview. Neural Netw. 140, 65\u201399 (2021). https:\/\/doi.org\/10.1016\/j.neunet.2021.03.004","journal-title":"Neural Netw."},{"key":"385_CR5","doi-asserted-by":"publisher","unstructured":"G. Bhattacharya, J. Alam, P. Kenny, in Proc. Interspeech 2017. Deep speaker embeddings for short-duration speaker verification (2017), pp. 1517\u20131521. https:\/\/doi.org\/10.21437\/Interspeech.2017-1575","DOI":"10.21437\/Interspeech.2017-1575"},{"issue":"4","key":"385_CR6","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"N. Dehak, P.J. Kenny, R. Dehak, P. Dumouchel, P. Ouellet, Front-end factor analysis for speaker verification. IEEE Trans. Audio Speech Lang. Process. 19(4), 788\u2013798 (2011). https:\/\/doi.org\/10.1109\/TASL.2010.2064307","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"385_CR7","doi-asserted-by":"publisher","unstructured":"D. Snyder, D. Garcia-Romero, G. Sell, D. Povey, S. Khudanpur, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). X-vectors: Robust DNN embeddings for speaker recognition (2018), pp. 5329\u20135333. https:\/\/doi.org\/10.1109\/ICASSP.2018.8461375","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"385_CR8","doi-asserted-by":"publisher","first-page":"101027","DOI":"10.1016\/j.csl.2019.101027","volume":"60","author":"A Nagrani","year":"2020","unstructured":"A. Nagrani, J.S. Chung, W. Xie, A. Zisserman, Voxceleb: Large-scale speaker verification in the wild. Comput. Speech Lang. 60, 101027 (2020). https:\/\/doi.org\/10.1016\/j.csl.2019.101027","journal-title":"Comput. Speech Lang."},{"key":"385_CR9","doi-asserted-by":"publisher","unstructured":"Y. Fan, J. Kang, L. Li, K. Li, H. Chen, S. Cheng, P. Zhang, Z. Zhou, Y. Cai, D. Wang, in 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). CN-celeb: A challenging chinese speaker recognition dataset (2020), pp. 7604\u20137608. https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054017","DOI":"10.1109\/ICASSP40776.2020.9054017"},{"key":"385_CR10","unstructured":"Y. Jia, Y. Zhang, R. Weiss, Q. Wang, J. Shen, F. Ren, P. Nguyen, R. Pang, I. Lopez Moreno, Y. Wu, et al., Transfer learning from speaker verification to multispeaker text-to-speech synthesis. Adv. Neural Inf. Process. Syst. 32, 1\u201311 (2019)."},{"key":"385_CR11","doi-asserted-by":"publisher","unstructured":"L. Zhang, N. Jiang, Q. Wang, Y. Li, Q. Lu, L. Xie, Whisper-SV: Adapting Whisper for low-data-resource speaker verification. Speech Commun. 103103 (2024). https:\/\/doi.org\/10.1016\/j.specom.2024.103103","DOI":"10.1016\/j.specom.2024.103103"},{"key":"385_CR12","doi-asserted-by":"publisher","first-page":"109420","DOI":"10.1016\/j.apacoust.2023.109420","volume":"209","author":"S Aziz","year":"2023","unstructured":"S. Aziz, S. Shahnawazuddin, Effective preservation of higher-frequency contents in the context of short utterance based children\u2019s speaker verification system. Appl. Acoust. 209, 109420 (2023). https:\/\/doi.org\/10.1016\/j.apacoust.2023.109420","journal-title":"Appl. Acoust."},{"key":"385_CR13","doi-asserted-by":"publisher","unstructured":"Z. Fan, M. Li, S. Zhou, B. Xu, in Proc. Interspeech 2021. Exploring wav2vec 2.0 on speaker verification and language identification (2021), pp. 1509\u20131513. https:\/\/doi.org\/10.21437\/Interspeech.2021-1280","DOI":"10.21437\/Interspeech.2021-1280"},{"key":"385_CR14","doi-asserted-by":"publisher","unstructured":"Y. Li, H. Huang, Z. Chen, W. Guan, J. Lin, L. Li, Q. Hong, in 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). SR-HuBERT: An efficient pre-trained model for speaker verification (2024), pp. 11591\u201311595. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10447606","DOI":"10.1109\/ICASSP48485.2024.10447606"},{"key":"385_CR15","doi-asserted-by":"publisher","unstructured":"Q. Hong, J. Zhang, L. Li, L. Wan, F. Tong, in 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). A transfer learning method for PLDA-based speaker verification (2016), pp. 5455\u20135459. https:\/\/doi.org\/10.1109\/ICASSP.2016.7472720","DOI":"10.1109\/ICASSP.2016.7472720"},{"key":"385_CR16","doi-asserted-by":"publisher","unstructured":"X. Qin, D. Cai, M. Li, in Proc. Interspeech 2019. Far-field end-to-end text-dependent speaker verification based on mixed training data with transfer learning and enrollment data augmentation (2019), pp. 4045\u20134049. https:\/\/doi.org\/10.21437\/Interspeech.2019-1542","DOI":"10.21437\/Interspeech.2019-1542"},{"key":"385_CR17","doi-asserted-by":"publisher","unstructured":"L. Zhang, Q. Wang, K.A. Lee, L. Xie, H. Li, in Proc. Interspeech 2021. Multi-level transfer learning from near-field to far-field speaker verification (2021), pp. 1094\u20131098. https:\/\/doi.org\/10.21437\/Interspeech.2021-1980","DOI":"10.21437\/Interspeech.2021-1980"},{"key":"385_CR18","doi-asserted-by":"publisher","unstructured":"M.J. Alam, G. Bhattacharya, P. Kenny, in Proc. The Speaker and Language Recognition Workshop (Odyssey 2018). Speaker verification in mismatched conditions with frustratingly easy domain adaptation (2018), pp. 176\u2013180. https:\/\/doi.org\/10.21437\/Odyssey.2018-25","DOI":"10.21437\/Odyssey.2018-25"},{"key":"385_CR19","doi-asserted-by":"publisher","unstructured":"P.M. Bousquet, M. Rouvier, in Proc. Interspeech 2019. On robustness of unsupervised domain adaptation for speaker recognition (2019), pp. 2958\u20132962. https:\/\/doi.org\/10.21437\/Interspeech.2019-1524","DOI":"10.21437\/Interspeech.2019-1524"},{"key":"385_CR20","doi-asserted-by":"publisher","unstructured":"Y. Zheng, J. Peng, Y. Chen, Y. Zhang, J. Wang, M. Liu, M. Xu.The SpeakIn Speaker Verification System for Far-Field Speaker Verification Challenge 2022. Proc. The 2022 Far-field Speaker Verification Challenge (FFSVC2022) (2022), pp. 15\u201319. https:\/\/doi.org\/10.21437\/FFSVC.2022-4","DOI":"10.21437\/FFSVC.2022-4"},{"key":"385_CR21","doi-asserted-by":"publisher","unstructured":"L. Zhang, Q. Wang, H. Wang, Y. Li, W. Rao, Y. Wang, L. Xie, in 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Distance-based weight transfer for fine-tuning from near-field to far-field speaker verification (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096790","DOI":"10.1109\/ICASSP49357.2023.10096790"},{"key":"385_CR22","doi-asserted-by":"publisher","first-page":"2378","DOI":"10.1109\/TASLP.2024.3389646","volume":"32","author":"J Li","year":"2024","unstructured":"J. Li, J. Han, F. Qian, T. Zheng, Y. He, G. Zheng, Distance metric-based open-set domain adaptation for speaker verification. IEEE\/ACM Trans. Audio Speech Lang. Process. 32, 2378\u20132390 (2024). https:\/\/doi.org\/10.1109\/TASLP.2024.3389646","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"385_CR23","doi-asserted-by":"publisher","unstructured":"N. Brummer, A. Mccree, S. Shum, D. Garcia-Romero, C. Vaquero, in Proc. The Speaker and Language Recognition Workshop (Odyssey 2014). Unsupervised domain adaptation for i-vector speaker recognition (2014), pp. 260\u2013264. https:\/\/doi.org\/10.21437\/Odyssey.2014-39","DOI":"10.21437\/Odyssey.2014-39"},{"key":"385_CR24","doi-asserted-by":"publisher","unstructured":"K.A. Lee, Q. Wang, T. Koshinaka, in 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). The CORAL+ algorithm for unsupervised domain adaptation of PLDA (2019), pp. 5821\u20135825. https:\/\/doi.org\/10.1109\/ICASSP.2019.8682852","DOI":"10.1109\/ICASSP.2019.8682852"},{"key":"385_CR25","doi-asserted-by":"publisher","unstructured":"R. Li, W. Zhang, D. Chen, in 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). The CORAL++ algorithm for unsupervised domain adaptation of speaker recognition (2022), pp. 7172\u20137176. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747792","DOI":"10.1109\/ICASSP43922.2022.9747792"},{"key":"385_CR26","doi-asserted-by":"publisher","unstructured":"Q. Wang, K. Okabe, K.A. Lee, T. Koshinaka, in 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). A generalized framework for domain adaptation of PLDA in speaker recognition (2020), pp. 6619\u20136623. https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054113","DOI":"10.1109\/ICASSP40776.2020.9054113"},{"key":"385_CR27","doi-asserted-by":"publisher","unstructured":"Q. Wang, W. Rao, S. Sun, L. Xie, E.S. Chng, H. Li, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Unsupervised domain adaptation via domain adversarial training for speaker recognition (2018), pp. 4889\u20134893. https:\/\/doi.org\/10.1109\/ICASSP.2018.8461423","DOI":"10.1109\/ICASSP.2018.8461423"},{"key":"385_CR28","doi-asserted-by":"publisher","unstructured":"H. Zhang, L. Wang, K.A. Lee, M. Liu, J. Dang, H. Chen, in 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Learning domain-invariant transformation for speaker verification (2022), pp. 7177\u20137181. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747514","DOI":"10.1109\/ICASSP43922.2022.9747514"},{"key":"385_CR29","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1109\/LSP.2022.3154237","volume":"29","author":"Y Wei","year":"2022","unstructured":"Y. Wei, J. Du, H. Liu, Z. Zhang, CentriForce: Multiple-domain adaptation for domain-invariant speaker representation learning. IEEE Signal Process. Lett. 29, 807\u2013811 (2022). https:\/\/doi.org\/10.1109\/LSP.2022.3154237","journal-title":"IEEE Signal Process. Lett."},{"key":"385_CR30","doi-asserted-by":"publisher","unstructured":"W. Huang, B. Han, S. Wang, Z. Chen, Y. Qian, in 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Robust cross-domain speaker verification with multi-level domain adapters (2024), pp. 11781\u201311785. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10446444","DOI":"10.1109\/ICASSP48485.2024.10446444"},{"key":"385_CR31","unstructured":"S.J. Pan, J.T. Kwok, Q. Yang et al., in AAAI. Transfer learning via dimensionality reduction, vol. 8 (AAAI Press, Menlo Park, CA, USA, 2008), pp. 677\u2013682."},{"key":"385_CR32","doi-asserted-by":"publisher","unstructured":"M. Sargin, E. Erzin, Y. Yemez, A. Tekalp, in 2006 IEEE International Conference on Acoustics Speech and Signal Processing Proceedings. Multimodal speaker identification using canonical correlation analysis, vol. 1 (2006), pp. I\u2013I. https:\/\/doi.org\/10.1109\/ICASSP.2006.1660095","DOI":"10.1109\/ICASSP.2006.1660095"},{"key":"385_CR33","doi-asserted-by":"publisher","unstructured":"X. Qin, H. Bu, M. Li, in 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). HI-MIA: A far-field text-dependent speaker verification database and the baselines (2020), pp. 7609\u20137613. https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054423","DOI":"10.1109\/ICASSP40776.2020.9054423"},{"issue":"10","key":"385_CR34","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"SJ Pan","year":"2010","unstructured":"S.J. Pan, Q. Yang, A survey on transfer learning. IEEE Trans. Knowl. Data Eng. 22(10), 1345\u20131359 (2010). https:\/\/doi.org\/10.1109\/TKDE.2009.191","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"385_CR35","unstructured":"K. Saito, Y. Ushiku, T. Harada, in International Conference on Machine Learning. Asymmetric tri-training for unsupervised domain adaptation (PMLR, La Jolla, CA, 2017), pp. 2988\u20132997."},{"key":"385_CR36","doi-asserted-by":"publisher","unstructured":"L. Ge, J. Gao, A. Zhang, in Proceedings of the 22nd ACM international conference on Information & Knowledge Management. OMS-TL: A framework of online multiple source transfer learning (2013), pp. 2423\u20132428. https:\/\/doi.org\/10.1145\/2505515.250560","DOI":"10.1145\/2505515.250560"},{"key":"385_CR37","doi-asserted-by":"publisher","unstructured":"J. Wang, Y. Chen, S. Hao, W. Feng, Z. Shen, in 2017 IEEE International Conference on Data Mining (ICDM). Balanced distribution adaptation for transfer learning (2017), pp. 1129\u20131134. https:\/\/doi.org\/10.1109\/ICDM.2017.150","DOI":"10.1109\/ICDM.2017.150"},{"key":"385_CR38","unstructured":"L. Mihalkova, T. Huynh, R.J. Mooney, in AAAI. Mapping and revising Markov logic networks for transfer learning, vol. 7. (AAAI Press, Menlo Park, CA, USA, 2007), pp. 608\u2013614."},{"issue":"1","key":"385_CR39","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"D.A. Reynolds, T.F. Quatieri, R.B. Dunn, Speaker verification using adapted Gaussian mixture models. Digit. Signal Process. 10(1), 19\u201341 (2000). https:\/\/doi.org\/10.1006\/dspr.1999.0361","journal-title":"Digit. Signal Process."},{"key":"385_CR40","doi-asserted-by":"publisher","unstructured":"J. Villalba, N. Chen, D. Snyder, D. Garcia-Romero, A. McCree, G. Sell, J. Borgstrom, F. Richardson, S. Shon, F. Grondin, R. Dehak, L.P. Garc\u00eda-Perera, D. Povey, P.A. Torres-Carrasquillo, S. Khudanpur, N. Dehak, in Proc. Interspeech 2019. State-of-the-art speaker recognition for telephone and video speech: the JHU-MIT submission for NIST SRE18 (2019), pp. 1488\u20131492. https:\/\/doi.org\/10.21437\/Interspeech.2019-2713","DOI":"10.21437\/Interspeech.2019-2713"},{"key":"385_CR41","doi-asserted-by":"publisher","unstructured":"D. Snyder, D. Garcia-Romero, G. Sell, A. McCree, D. Povey, S. Khudanpur, in 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Speaker recognition for multi-speaker conversations using x-vectors (2019), pp. 5796\u20135800. https:\/\/doi.org\/10.1109\/ICASSP.2019.8683760","DOI":"10.1109\/ICASSP.2019.8683760"},{"key":"385_CR42","doi-asserted-by":"publisher","unstructured":"K. He, X. Zhang, S. Ren, J. Sun, in 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Deep residual learning for image recognition (2016), pp. 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"385_CR43","doi-asserted-by":"publisher","unstructured":"D. Wang, Y. Ding, Q. Zhao, P. Yang, S. Tan, Y. Li. ECAPA-TDNN Based Depression Detection from Clinical Speech. Proc. Interspeech.\u00a02022, 3333-3337\u00a0(2022). https:\/\/doi.org\/10.21437\/Interspeech.2022-10051","DOI":"10.21437\/Interspeech.2022-10051"},{"key":"385_CR44","doi-asserted-by":"publisher","unstructured":"H. Wang, S. Zheng, Y. Chen, L. Cheng, Q. Chen, in Proc. Interspeech 2023. CAM++: a fast and efficient network for speaker verification using context-aware masking (2023), pp. 5301\u20135305. https:\/\/doi.org\/10.21437\/Interspeech.2023-1513","DOI":"10.21437\/Interspeech.2023-1513"},{"key":"385_CR45","doi-asserted-by":"publisher","unstructured":"Y. Chen, S. Zheng, H. Wang, L. Cheng, Q. Chen, J. Qi, in Proc. Interspeech 2023. An enhanced Res2Net with local and global feature fusion for speaker verification (2023), pp. 2228\u20132232. https:\/\/doi.org\/10.21437\/Interspeech.2023-1294","DOI":"10.21437\/Interspeech.2023-1294"},{"key":"385_CR46","doi-asserted-by":"publisher","unstructured":"S.J. Prince, J.H. Elder, in 2007 IEEE 11th International Conference on Computer Vision. Probabilistic linear discriminant analysis for inferences about identity (2007), pp. 1\u20138. https:\/\/doi.org\/10.1109\/ICCV.2007.4409052","DOI":"10.1109\/ICCV.2007.4409052"},{"key":"385_CR47","unstructured":"N. Dehak, R. Dehak, J. Glass, D. Reynolds, P. Kenny, in Proc. The Speaker and Language Recognition Workshop (Odyssey 2010). Cosine similarity scoring without score normalization techniques (ISCA, Tucson, AZ, USA, 2010), p. paper 15."},{"key":"385_CR48","doi-asserted-by":"publisher","unstructured":"K. Livescu, M. Stoehr, in 2009 IEEE Workshop on Automatic Speech Recognition & Understanding. Multi-view learning of acoustic features for speaker recognition (2009), pp. 82\u201386. https:\/\/doi.org\/10.1109\/ASRU.2009.5373462","DOI":"10.1109\/ASRU.2009.5373462"},{"issue":"1","key":"385_CR49","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1121\/1.4954653","volume":"140","author":"RK Das","year":"2016","unstructured":"R.K. Das, S. Mahadeva\u00a0Prasanna, Exploring different attributes of source information for speaker verification with limited test data. J. Acoust. Soc. Am. 140(1), 184\u2013190 (2016). https:\/\/doi.org\/10.1121\/1.4954653","journal-title":"J. Acoust. Soc. Am."},{"key":"385_CR50","doi-asserted-by":"publisher","unstructured":"Z. Li, M. Zhao, J. Li, L. Li, Q. Hong, in Proc. Interspeech 2020. On the usage of multi-feature integration for speaker verification and language identification (2020), pp. 457\u2013461. https:\/\/doi.org\/10.21437\/Interspeech.2020-1960","DOI":"10.21437\/Interspeech.2020-1960"},{"key":"385_CR51","doi-asserted-by":"publisher","unstructured":"L. Xu, R.K. Das, E. Y\u0131lmaz, J. Yang, H. Li, in 2018 IEEE Spoken Language Technology Workshop (SLT). Generative x-vectors for text-independent speaker verification (2018), pp. 1014\u20131020. https:\/\/doi.org\/10.1109\/SLT.2018.8639510","DOI":"10.1109\/SLT.2018.8639510"},{"issue":"7","key":"385_CR52","doi-asserted-by":"publisher","first-page":"926","DOI":"10.1109\/LSP.2018.2822810","volume":"25","author":"F Wang","year":"2018","unstructured":"F. Wang, J. Cheng, W. Liu, H. Liu, Additive margin softmax for face verification. IEEE Signal Process. Lett. 25(7), 926\u2013930 (2018). https:\/\/doi.org\/10.1109\/LSP.2018.2822810","journal-title":"IEEE Signal Process. Lett."},{"key":"385_CR53","doi-asserted-by":"publisher","unstructured":"B. Han, Z. Chen, Y. Qian, in 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Exploring binary classification loss for speaker verification (2023), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10094954","DOI":"10.1109\/ICASSP49357.2023.10094954"},{"key":"385_CR54","doi-asserted-by":"publisher","unstructured":"Y. Liu, L. He, J. Liu, in Proc. Interspeech 2019. Large margin softmax loss for speaker verification (2019), pp. 2873\u20132877. https:\/\/doi.org\/10.21437\/Interspeech.2019-2357","DOI":"10.21437\/Interspeech.2019-2357"},{"key":"385_CR55","doi-asserted-by":"publisher","unstructured":"T. Ko, V. Peddinti, D. Povey, S. Khudanpur, in Proc. Interspeech 2015. Audio augmentation for speech recognition (2015), pp. 3586\u20133589. https:\/\/doi.org\/10.21437\/Interspeech.2015-711","DOI":"10.21437\/Interspeech.2015-711"},{"key":"385_CR56","doi-asserted-by":"publisher","unstructured":"V. Peddinti, D. Povey, S. Khudanpur, in Proc. Interspeech 2015. A time delay neural network architecture for efficient modeling of long temporal contexts (2015), pp. 3214\u20133218. https:\/\/doi.org\/10.21437\/Interspeech.2015-647","DOI":"10.21437\/Interspeech.2015-647"},{"issue":"3","key":"385_CR57","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1016\/j.specom.2003.10.002","volume":"42","author":"J Ram\u00edrez","year":"2004","unstructured":"J. Ram\u00edrez, J.C. Segura, C. Ben\u00edtez, A. de\u00a0la\u00a0Torre, A. Rubio, Efficient voice activity detection algorithms using long-term speech information. Speech Commun. 42(3), 271\u2013287 (2004). https:\/\/doi.org\/10.1016\/j.specom.2003.10.002","journal-title":"Speech Commun."},{"key":"385_CR58","unstructured":"I. Loshchilov, F. Hutter, in International Conference on Learning Representations (ICLR 2019). Decoupled weight decay regularization (OpenReview.net, 2019)."},{"key":"385_CR59","doi-asserted-by":"publisher","unstructured":"F. Tong, M. Zhao, J. Zhou, H. Lu, Z. Li, L. Li, Q. Hong, in 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). ASV-Subtools: Open source toolkit for automatic speaker verification (2021), pp. 6184\u20136188. https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9414676","DOI":"10.1109\/ICASSP39728.2021.9414676"},{"key":"385_CR60","first-page":"1","volume":"32","author":"A Paszke","year":"2019","unstructured":"A. Paszke, S. Gross, F. Massa, A. Lerer, J. Bradbury, G. Chanan, T. Killeen, Z. Lin, N. Gimelshein, L. Antiga et al., PyTorch: An imperative style, high-performance deep learning library. Adv. Neural Inf. Process. Syst. 32, 1\u201312 (2019)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"385_CR61","unstructured":"D. Povey, A. Ghoshal, G. Boulianne, L. Burget, O. Glembek, N. Goel, M. Hannemann, P. Motlicek, Y. Qian, P. Schwarz et al., in IEEE 2011 Workshop on Automatic Speech Recognition and Understanding. The Kaldi speech recognition toolkit (IEEE Signal Processing Society, Hilton Waikoloa Village, Big Island, Hawaii, USA, 2011)."},{"issue":"86","key":"385_CR62","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"L. van der Maaten, G. Hinton, Visualizing data using t-SNE. J. Mach. Learn. Res. 9(86), 2579\u20132605 (2008)","journal-title":"J. Mach. Learn. Res."}],"container-title":["EURASIP Journal on Audio, Speech, and Music Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-024-00385-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s13636-024-00385-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-024-00385-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T06:04:22Z","timestamp":1734674662000},"score":1,"resource":{"primary":{"URL":"https:\/\/asmp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13636-024-00385-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,20]]},"references-count":62,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["385"],"URL":"https:\/\/doi.org\/10.1186\/s13636-024-00385-z","relation":{},"ISSN":["1687-4722"],"issn-type":[{"type":"electronic","value":"1687-4722"}],"subject":[],"published":{"date-parts":[[2024,12,20]]},"assertion":[{"value":"9 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Agree.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"64"}}