{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T11:02:05Z","timestamp":1740135725100,"version":"3.37.3"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2024,4,11]],"date-time":"2024-04-11T00:00:00Z","timestamp":1712793600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,11]],"date-time":"2024-04-11T00:00:00Z","timestamp":1712793600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 62176194"],"award-info":[{"award-number":["No. 62176194"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s00034-024-02666-6","type":"journal-article","created":{"date-parts":[[2024,4,11]],"date-time":"2024-04-11T12:01:56Z","timestamp":1712836916000},"page":"4508-4527","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Resformer: Local Frame-Level Feature and Global Segment-Level Feature Joint Learning for Speaker Verification"],"prefix":"10.1007","volume":"43","author":[{"given":"Yunfei","family":"Zi","sequence":"first","affiliation":[]},{"given":"Shengwu","family":"Xiong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,11]]},"reference":[{"key":"2666_CR1","doi-asserted-by":"publisher","first-page":"4903","DOI":"10.1007\/s00034-021-01697-7","volume":"40","author":"MT Al-Kaltakchi","year":"2021","unstructured":"M.T. Al-Kaltakchi, M.A. Abdullah, W.L. Woo, S.S. Dlay, Combined i-vector and extreme learning machine approach for robust speaker identification and evaluation with SITW 2016, NIST 2008, TIMIT Databases. Circuits Syst. Signal Process. 40, 4903\u20134923 (2021). https:\/\/doi.org\/10.1007\/s00034-021-01697-7","journal-title":"Circuits Syst. Signal Process."},{"key":"2666_CR2","doi-asserted-by":"publisher","unstructured":"M. T. Al-Kaltakchi, W. L. Woo, S. S. Dlay, & J. A. Chambers, Study of fusion strategies and exploiting the combination of MFCC and PNCC features for robust biometric speaker identification, in\u00a04th international conference on biometrics and forensics (IWBF 2016),\u00a0pp. 1\u20136 (2016). https:\/\/doi.org\/10.1109\/IWBF.2016.7449685","DOI":"10.1109\/IWBF.2016.7449685"},{"key":"2666_CR3","doi-asserted-by":"publisher","first-page":"21","DOI":"10.48550\/arXiv.1607.06450","volume":"1050","author":"JL Ba","year":"2016","unstructured":"J.L. Ba, J.R. Kiros, G.E. Hinton, Layer Normalization. Stat 1050, 21 (2016). https:\/\/doi.org\/10.48550\/arXiv.1607.06450","journal-title":"Stat"},{"issue":"12","key":"2666_CR4","doi-asserted-by":"publisher","first-page":"4235","DOI":"10.1109\/TCYB.2016.2603146","volume":"47","author":"G Biagetti","year":"2016","unstructured":"G. Biagetti, P. Crippa, L. Falaschetti, S. Orcioni, C. Turchetti, An investigation on the accuracy of truncated DKLT representation for speaker identification with short sequences of speech frames. IEEE trans. Cybern. 47(12), 4235\u20134249 (2016). https:\/\/doi.org\/10.1109\/TCYB.2016.2603146","journal-title":"IEEE trans. Cybern."},{"issue":"9","key":"2666_CR5","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1109\/5.628714","volume":"85","author":"JP Campbell","year":"1997","unstructured":"J.P. Campbell, Speaker recognition: a tutorial. Proc. of the IEEE 85(9), 1437\u20131462 (1997). https:\/\/doi.org\/10.1109\/5.628714","journal-title":"Proc. of the IEEE"},{"issue":"2\u20133","key":"2666_CR6","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1016\/j.csl.2005.06.003","volume":"20","author":"WM Campbell","year":"2006","unstructured":"W.M. Campbell, J.P. Campbell, D.A. Reynolds, E. Singer, P.A. Torres-Carrasquillo, Support vector machines for speaker and language recognition. Comput. Speech Lang. 20(2\u20133), 210\u2013229 (2006). https:\/\/doi.org\/10.1016\/j.csl.2005.06.003","journal-title":"Comput. Speech Lang."},{"issue":"5","key":"2666_CR7","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1109\/LSP.2006.870086","volume":"13","author":"WM Campbell","year":"2006","unstructured":"W.M. Campbell, D.E. Sturim, D.A. Reynolds, Support vector machines using GMM supervectors for speaker verification. IEEE Signal Process. Lett. 13(5), 308\u2013311 (2006). https:\/\/doi.org\/10.1109\/LSP.2006.870086","journal-title":"IEEE Signal Process. Lett."},{"key":"2666_CR8","doi-asserted-by":"publisher","first-page":"1616","DOI":"10.1109\/TIFS.2019.2941773","volume":"15","author":"A Chowdhury","year":"2019","unstructured":"A. Chowdhury, A. Ross, Fusing MFCC and LPC features using 1D triplet CNN for speaker recognition in severely degraded audio signals. IEEE Trans. Inf. Forensics Secur. 15, 1616\u20131629 (2019). https:\/\/doi.org\/10.1109\/TIFS.2019.2941773","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"key":"2666_CR9","doi-asserted-by":"publisher","unstructured":"J. Chung, A. Nagrani, & A. Zisserman, VoxCeleb2: Deep speaker recognition,\u00a0in\u00a019st Annual conference of the International Speech Communication Association (Interspeech 2018), (pp. 1086\u20131090) (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1929","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"2666_CR10","doi-asserted-by":"publisher","unstructured":"Z. Dai, Z. Yang, Y. Yang, J. G. Carbonell, Q. Le, & R. Salakhutdinov, Transformer-XL: Attentive Language Models beyond a Fixed-Length Context, in\u00a0Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019), pp. 2978\u20132988 (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1285","DOI":"10.18653\/v1\/P19-1285"},{"issue":"1","key":"2666_CR11","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1121\/1.4954653","volume":"140","author":"RK Das","year":"2016","unstructured":"R.K. Das, S.R. Mahadeva Prasanna, Exploring different attributes of source information for speaker verification with limited test data. J. Acoust. Soc. Am. 140(1), 184\u2013190 (2016). https:\/\/doi.org\/10.1121\/1.4954653","journal-title":"J. Acoust. Soc. Am."},{"issue":"4","key":"2666_CR12","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"N. Dehak, P.J. Kenny, R. Dehak, P. Dumouchel, P. Ouellet, Front-end factor analysis for speaker verification. IEEE Trans. Audio Speech Lang. Process. 19(4), 788\u2013798 (2011). https:\/\/doi.org\/10.1109\/TASL.2010.2064307","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2666_CR13","doi-asserted-by":"publisher","unstructured":"B. Desplanques, J. Thienpondt, & K. Demuynck, ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in TDNN based speaker verification, in\u00a021st Annual conference of the International Speech Communication Association (Interspeech 2020), pp. 3830\u20133834 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2650","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"2666_CR14","doi-asserted-by":"publisher","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, ... & N. Houlsby, An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale, in\u00a02021 International Conference on Learning Representations. (ICLR 2021), (2021). https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"key":"2666_CR15","doi-asserted-by":"crossref","unstructured":"K. He, X. Zhang, S. Ren, & J. Sun, Deep residual learning for image recognition, in\u00a0Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR 2016), pp. 770\u2013778 (2016). https:\/\/openaccess.thecvf.com\/ content_cvpr_2016\/papers\/He_Deep_Residual_Learning_CVPR_2016_paper.pdf","DOI":"10.1109\/CVPR.2016.90"},{"key":"2666_CR16","unstructured":"S. Ioffe, & C. Szegedy, Batch normalization: Accelerating deep network training by reducing internal covariate shift, in\u00a02015 International conference on machine learning (ICML 2015), pp. 448\u2013456 (2015). https:\/\/proceedings.mlr.press\/ v37\/ioffe15.pdf"},{"key":"2666_CR17","doi-asserted-by":"publisher","unstructured":"J. W. Jung, H. S. Heo, H. J. Shim, & H. J. Yu, Short utterance compensation in speaker verification via cosine-based teacher-student learning of speaker embeddings, in\u00a02019 IEEE automatic speech recognition and understanding workshop (ASRU 2019), pp. 335\u2013341 (2019). https:\/\/doi.org\/10.1109\/ASRU46091.2019.9004029","DOI":"10.1109\/ASRU46091.2019.9004029"},{"key":"2666_CR18","doi-asserted-by":"publisher","unstructured":"P. Kenny, G. Boulianne, P. Ouellet, & P. Dumouchel, Joint factor analysis versus eigenchannels in speaker recognition.\u00a0IEEE Transactions on Audio, Speech, and Language Processing,\u00a015(4), 1435\u20131447(2007). https:\/\/doi.org\/10.1109\/TASL.2006.881693","DOI":"10.1109\/TASL.2006.881693"},{"key":"2666_CR19","doi-asserted-by":"publisher","unstructured":"J. H. Kim, H. J. Shim, J. Heo, & H. J. Yu, RawNeXt: Speaker verification system for variable-duration utterances with deep layer aggregation and extended dynamic scaling policies, in 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2022), pp. 7647\u20137651 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022. 9747594","DOI":"10.1109\/ICASSP43922.2022"},{"issue":"7","key":"2666_CR20","doi-asserted-by":"publisher","first-page":"1990","DOI":"10.1109\/TASL.2012.2191960","volume":"20","author":"T Kinnunen","year":"2012","unstructured":"T. Kinnunen, R. Saeidi, F. Sedl\u00e1k, K.A. Lee, J. Sandberg, M. Hansson-Sandsten, H. Li, Low-variance multitaper MFCC features: a case study in robust speaker verification. IEEE Trans. Audio Speech Lang. Process. 20(7), 1990\u20132001 (2012). https:\/\/doi.org\/10.1109\/TASL.2012.2191960","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2666_CR21","doi-asserted-by":"publisher","unstructured":"S. M. Kye, Y. Jung, H. B. Lee, S. J. Hwang, & H. R. Kim, Meta-Learning for Short Utterance Speaker Recognition with Imbalance Length Pairs, in\u00a021st Annual conference of the International Speech Communication Association (Interspeech 2020), pp. 2982\u20132986 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-1283","DOI":"10.21437\/Interspeech.2020-1283"},{"key":"2666_CR22","doi-asserted-by":"crossref","unstructured":"X. Li, W. Wang, X. Hu, & J. Yang, Selective kernel networks, in\u00a0Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR 2019), pp. 510\u2013519 (2019). https:\/\/openaccess.thecvf.com\/content_CVPR_2019\/ papers\/ Li_ Selective_ Kernel_Networks_ CVPR_2019_paper.pdf","DOI":"10.1109\/CVPR.2019.00060"},{"key":"2666_CR23","doi-asserted-by":"crossref","unstructured":"L. Li, D. Wang, C. Zhang, & T. F. Zheng, Improving short utterance speaker recognition by modeling speech unit classes.\u00a0IEEE\/ACM Transactions on Audio, Speech, and Language Processing,\u00a024(6), 1129\u20131139 (2016).","DOI":"10.1109\/TASLP.2016.2544660"},{"key":"2666_CR24","doi-asserted-by":"publisher","unstructured":"T. Liu, R. K. Das, K. A. Lee, & H. Li, MFA: TDNN with multi-scale frequency-channel attention for text-independent speaker verification with short utterances, in 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2022), pp. 7517\u20137521 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747021","DOI":"10.1109\/ICASSP43922.2022.9747021"},{"key":"2666_CR25","doi-asserted-by":"publisher","unstructured":"B. Liu, H. Wang, Z. Chen, S. Wang, & Y. Qian, Self-knowledge distillation via feature enhancement for speaker verification, in 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2022), pp. 7542\u20137546 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746529","DOI":"10.1109\/ICASSP43922.2022.9746529"},{"issue":"7","key":"2666_CR26","doi-asserted-by":"publisher","first-page":"3244","DOI":"10.1109\/TII.2018.2799928","volume":"14","author":"Z Liu","year":"2018","unstructured":"Z. Liu, Z. Wu, T. Li, J. Li, C. Shen, GMM and CNN hybrid method for short utterance speaker recognition. IEEE Trans. Industr. Inf. 14(7), 3244\u20133252 (2018). https:\/\/doi.org\/10.1109\/TII.2018.2799928","journal-title":"IEEE Trans. Industr. Inf."},{"key":"2666_CR27","unstructured":"Morpho and Agnitio, Bring Voice Biometrics to Criminal ID. Available: https:\/\/findbiometrics.com\/morpho-and-agnitio-partner-bring-voice biometrics-to-criminal-id-21261\/ (2018). Accessed 13 June 2018."},{"key":"2666_CR28","doi-asserted-by":"publisher","unstructured":"A. Nagrani, J. Chung, & A. Zisserman, VoxCeleb: a large-scale speaker identification dataset,\u00a0in\u00a018st Annual conference of the International Speech Communication Association (Interspeech 2017), pp. 2616\u20132620 (2017). https:\/\/doi.org\/10.21437\/Interspeech.2017-950","DOI":"10.21437\/Interspeech.2017-950"},{"key":"2666_CR29","doi-asserted-by":"publisher","unstructured":"S. Nakagawa, L. Wang, & S. Ohtsuka, Speaker identification and verification by combining MFCC and phase information.\u00a0IEEE transactions on audio, speech, and language processing,\u00a020(4), 1085\u20131095 (2012). https:\/\/doi.org\/10.1109\/TASL.2011.2172422","DOI":"10.1109\/TASL.2011.2172422"},{"key":"2666_CR30","unstructured":"A. Paszke, S. Gross, S. Chintala, G. Chanan, E. Yang, Z. DeVito, & A. Lerer, Automatic differentiation in pytorch,\u00a0in Advances in neural information processing systems (NIPS 2017), pp. 1\u20134 (2017). https:\/\/note.wcoder.com\/files\/ml\/ automatic_ differentiation_ in_pytorch.pdf"},{"key":"2666_CR31","doi-asserted-by":"publisher","unstructured":"D. Povey, G. Cheng, Y. Wang, K. Li, H. Xu, M. Yarmohammadi, & S. Khudanpur, Semi-orthogonal low-rank matrix factorization for deep neural networks, in\u00a019st Annual conference of the International Speech Communication Association (Interspeech 2018), pp. 3743\u20133747 (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1417","DOI":"10.21437\/Interspeech.2018-1417"},{"issue":"1\u20133","key":"2666_CR32","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"D.A. Reynolds, T.F. Quatieri, R.B. Dunn, Speaker verification using adapted Gaussian mixture models. Digit. Signal Process. 10(1\u20133), 19\u201341 (2000). https:\/\/doi.org\/10.1006\/dspr.1999.0361","journal-title":"Digit. Signal Process."},{"issue":"2","key":"2666_CR33","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1109\/LSP.2012.2235067","volume":"20","author":"M Sahidullah","year":"2012","unstructured":"M. Sahidullah, G. Saha, A novel windowing technique for efficient computation of MFCC for speaker recognition. IEEE Signal Process. Lett. 20(2), 149\u2013152 (2012). https:\/\/doi.org\/10.1109\/LSP.2012.2235067","journal-title":"IEEE Signal Process. Lett."},{"key":"2666_CR34","doi-asserted-by":"publisher","unstructured":"M. Sang, W. Xia, & J. H. Hansen, Open-set Short Utterance Forensic Speaker Verification using Teacher-Student Network with Explicit Inductive Bias, in\u00a021st Annual conference of the International Speech Communication Association (Interspeech 2020), pp. 2262\u20132266 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2868","DOI":"10.21437\/Interspeech.2020-2868"},{"key":"2666_CR35","doi-asserted-by":"publisher","unstructured":"F. Schroff, D. Kalenichenko, & J. Philbin, Facenet: A unified embedding for face recognition and clustering, in\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR 2015), pp. 815\u2013823 (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298682","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"2666_CR36","doi-asserted-by":"publisher","unstructured":"D. Snyder, D. Garcia-Romero, G. Sell, A. McCree, D. Povey, & S. Khudanpur, Speaker recognition for multi-speaker conversations using x-vectors, in 2019 IEEE International conference on acoustics, speech and signal processing (ICASSP 2019), pp. 5796\u20135800 (2019). https:\/\/doi.org\/10.1109\/ICASSP.2019.8683760","DOI":"10.1109\/ICASSP.2019.8683760"},{"key":"2666_CR37","doi-asserted-by":"publisher","unstructured":"D. Snyder, D. Garcia-Romero, G. Sell, D. Povey, & S. Khudanpur, X-vectors: Robust dnn embeddings for speaker recognition, in\u00a02018 IEEE international conference on acoustics, speech and signal processing (ICASSP 2018), pp. 5329\u20135333 (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8461375","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"2666_CR38","unstructured":"H. Touvron, M. Cord, M. Douze, F. Massa, A. Sablayrolles, & H. J\u00e9gou, Training data-efficient image transformers & distillation through attention, in\u00a02021 International conference on machine learning (ICML 2021), pp. 10347\u201310357 (2021). https:\/\/proceedings.mlr.press\/v139\/touvron21a\/touvron21a.pdf"},{"key":"2666_CR39","doi-asserted-by":"publisher","unstructured":"Y. Tu, & M. W. Mak, Short-time spectral aggregation for speaker embedding, in 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2021), pp. 6708\u20136712 (2021). https:\/\/doi.org\/10.1109\/ICASSP39728. 2021.9414094","DOI":"10.1109\/ICASSP39728"},{"key":"2666_CR40","doi-asserted-by":"publisher","unstructured":"Y. Tu, & M. W. Mak, Aggregating frame-level information in the spectral domain with self-attention for speaker embedding.\u00a0IEEE\/ACM Transactions on Audio, Speech, and Language Processing,\u00a030, 944\u2013957(2022). https:\/\/doi.org\/10.1109\/TASLP.2022.3153267","DOI":"10.1109\/TASLP.2022.3153267"},{"key":"2666_CR41","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A. N. Gomez, & I. Polosukhin, Attention is all you need,\u00a0in Advances in neural information processing systems(NIPS 2017), pp. 6000\u20136010 (2017). https:\/\/proceedings.neurips.cc\/ paper_ files\/paper\/ 2017\/file\/ 3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"2666_CR42","doi-asserted-by":"publisher","first-page":"101026","DOI":"10.1016\/j.csl.2019.101026","volume":"60","author":"J Villalba","year":"2020","unstructured":"J. Villalba, N. Chen, D. Snyder, D. Garcia-Romero, A. McCree, G. Sell, N. Dehak, State-of-the-art speaker recognition with neural network embeddings in NIST SRE18 and speakers in the wild evaluations. Comput. Speech Lang. 60, 101026 (2020). https:\/\/doi.org\/10.1016\/j.csl.2019.101026","journal-title":"Comput. Speech Lang."},{"issue":"6","key":"2666_CR43","doi-asserted-by":"publisher","first-page":"1182","DOI":"10.1109\/TASL.2009.2031505","volume":"18","author":"R Vogt","year":"2009","unstructured":"R. Vogt, S. Sridharan, M. Mason, Making confident speaker verification decisions with minimal speech. IEEE Trans. Audio Speech Lang. Process. 18(6), 1182\u20131192 (2009). https:\/\/doi.org\/10.1109\/TASL.2009.2031505","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2666_CR44","doi-asserted-by":"publisher","unstructured":"Z. Wang, & J. H. Hansen, Multi-source domain adaptation for text-independent forensic speaker recognition.\u00a0IEEE\/ACM Transactions on Audio, Speech, and Language Processing,\u00a030, 60\u201375 (2022). https:\/\/doi.org\/10.1109\/TASLP.2021. 3130975","DOI":"10.1109\/TASLP.2021"},{"issue":"11","key":"2666_CR45","doi-asserted-by":"publisher","first-page":"1686","DOI":"10.1109\/TASLP.2019.2928128","volume":"27","author":"S Wang","year":"2019","unstructured":"S. Wang, Z. Huang, Y. Qian, K. Yu, Discriminative neural embedding learning for short-duration text-independent speaker verification. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 27(11), 1686\u20131696 (2019). https:\/\/doi.org\/10.1109\/TASLP.2019.2928128","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"2666_CR46","doi-asserted-by":"crossref","unstructured":"Q. Wang, B. Li, T. Xiao, J. Zhu, C. Li, D. F. Wong, & L. S. Chao, Learning Deep Transformer Models for Machine Translation, in\u00a0Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019), pp. 1810\u20131822 (2019). https:\/\/aclanthology.org\/P19-1176.pdf","DOI":"10.18653\/v1\/P19-1176"},{"key":"2666_CR47","doi-asserted-by":"publisher","unstructured":"S. Yadav, & A. Rai, Frequency and temporal convolutional attention for text-independent speaker recognition, in\u00a02020 IEEE international conference on acoustics, speech and signal processing (ICASSP 2020), pp. 6794\u20136798 (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054440","DOI":"10.1109\/ICASSP40776.2020.9054440"},{"key":"2666_CR48","doi-asserted-by":"publisher","unstructured":"D. Zhu, & N. Chen, Multi-Source Domain Adaptation and Fusion for Speaker Verification.\u00a0IEEE\/ACM Transactions on Audio, Speech, and Language Processing,\u00a030, 2103-2116 (2022). https:\/\/doi.org\/10.1109\/TASLP.2022.3182271","DOI":"10.1109\/TASLP.2022.3182271"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-024-02666-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-024-02666-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-024-02666-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T11:14:58Z","timestamp":1721128498000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-024-02666-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,11]]},"references-count":48,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["2666"],"URL":"https:\/\/doi.org\/10.1007\/s00034-024-02666-6","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2024,4,11]]},"assertion":[{"value":"25 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 March 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 March 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 April 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no conflicts of interest regarding the publication of this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interest"}}]}}