{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:26:36Z","timestamp":1740122796493,"version":"3.37.3"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,11,5]],"date-time":"2023-11-05T00:00:00Z","timestamp":1699142400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,5]],"date-time":"2023-11-05T00:00:00Z","timestamp":1699142400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","award":["RGPIN-2018-05221"],"award-info":[{"award-number":["RGPIN-2018-05221"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10772-023-10059-4","type":"journal-article","created":{"date-parts":[[2023,11,5]],"date-time":"2023-11-05T13:01:25Z","timestamp":1699189285000},"page":"881-894","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Attention-based factorized TDNN for a noise-robust and spoof-aware speaker verification system"],"prefix":"10.1007","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1764-9078","authenticated-orcid":false,"given":"Zhor","family":"Benhafid","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0731-2632","authenticated-orcid":false,"given":"Sid Ahmed","family":"Selouani","sequence":"additional","affiliation":[]},{"given":"Abderrahmane","family":"Amrouche","sequence":"additional","affiliation":[]},{"given":"Mohammed","family":"Sidi Yakoub","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,5]]},"reference":[{"key":"10059_CR1","doi-asserted-by":"crossref","unstructured":"Alenin, A., Torgashov, N., Okhotnikov, A., Makarov, R., & Yakovlev, I. (2022). A subnetwork approach for spoofing aware speaker verification. In Proceedings in Interspeech 2022 (pp. 2888\u20132892).","DOI":"10.21437\/Interspeech.2022-10921"},{"key":"10059_CR2","doi-asserted-by":"crossref","unstructured":"Benhafid, Z., Selouani, S. A., & Amrouche, A. (2023). Light-spinenet variational autoencoder for logical access spoof utterances detection in speaker verification systems. In Proceedings in bios-mart (pp. 1\u20134).","DOI":"10.1109\/BioSMART58455.2023.10162119"},{"key":"10059_CR3","doi-asserted-by":"crossref","unstructured":"Benhafid, Z., Selouani, S. A., Yakoub, M. S., & Amrouche, A. (2021). LARIHS ASSERT reassessment for logical access ASVspoof 2021 Challenge. In Proceedings of 2021 edition of the automatic speaker verification and spoofing countermeasures challenge (pp. 94\u201399).","DOI":"10.21437\/ASVSPOOF.2021-15"},{"key":"10059_CR4","doi-asserted-by":"crossref","unstructured":"Bogdanov, D., Wack, N., G\u00f3mez, E., Gulati, S., Herrera, P., Mayor, O., & Serra, X. (2013). Essentia: An audio analysis library for music information retrieval. In Proceedings of the 14th international society for music information retrieval conference, (ISMIR 2013).","DOI":"10.1145\/2502081.2502229"},{"key":"10059_CR5","doi-asserted-by":"crossref","unstructured":"Cai, D., & Li, M. (2021). Embedding aggregation for far-field speaker verification with distributed microphone arrays. In 2021 IEEE spoken language technology workshop (SLT) (pp. 308\u2013315).","DOI":"10.1109\/SLT48900.2021.9383501"},{"key":"10059_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Z., & Lin, Y. (2020). Improving X-vector and PLDA for text-dependent speaker verification. In Proceedings of Interspeech, 2020, 726\u2013730.","DOI":"10.21437\/Interspeech.2020-1188"},{"key":"10059_CR7","doi-asserted-by":"crossref","unstructured":"Choi, J. -H., Yang, J. -Y., Jeoung, Y. -R., & Chang, J. -H. (2022). HYU submission for the SASV challenge 2022: Reforming speaker embeddings with spoofing-aware conditioning. In Proceedings Interspeech 2022 (pp. 2873-2877).","DOI":"10.21437\/Interspeech.2022-210"},{"key":"10059_CR8","doi-asserted-by":"crossref","unstructured":"Chung, J. S., Nagrani, A., & Zisserman, A. (2018). VoxCeleb2: Deep speaker recognition. In Interspeech, 2018, 1086\u20131090. Retrieved from https:\/\/arxiv.org\/abs\/1806.05622v2","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"10059_CR9","doi-asserted-by":"crossref","unstructured":"Desplanques, B., Thienpondt, J., & Demuynck, K. (2020). ECAPA-TDNN: Emphasized channel attention, propagation and aggregation in TDNN based speaker verification. In Proceedings Interspeech 2020 (Vol. 2020-Oct, pp. 3830\u20133834).","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"10059_CR10","doi-asserted-by":"crossref","unstructured":"Gao, Z., Mak, M. -W., & Lin, W. (2022). UNet-DenseNet for robust far-field speaker verification. In Proceedings Interspeech (pp. 3714\u20133718).","DOI":"10.21437\/Interspeech.2022-10350"},{"key":"10059_CR11","doi-asserted-by":"publisher","first-page":"1579","DOI":"10.1109\/TIFS.2020.3039045","volume":"16","author":"A Gomez-Alanis","year":"2021","unstructured":"Gomez-Alanis, A., Gonzalez-Lopez, J. A., Dubagunta, S. P., Peinado, A. M., & Magimai.-Doss, M. (2021). On joint optimization of automatic speaker verification and anti-spoofing in the embedding space. IEEE Transactions on Information Forensics and Security, 16, 1579\u20131593. https:\/\/doi.org\/10.1109\/TIFS.2020.3039045","journal-title":"IEEE Transactions on Information Forensics and Security"},{"key":"10059_CR12","doi-asserted-by":"crossref","unstructured":"Gusev, A., Volokhov, V., Andzhukaev, T., Novoselov, S., Lavrentyeva, G., Volkova, M., & Matveev, Y. (2020). Deep speaker embeddings for far-field speaker recognition on short utterances. In The speaker and language recognition workshop (Odyssey 2020) (pp. 179\u2013186).","DOI":"10.21437\/Odyssey.2020-26"},{"key":"10059_CR13","doi-asserted-by":"crossref","unstructured":"Hao, X., Su, X., Horaud, R., & Li, X. (2021). Fullsubnet: A full-band and sub-band fusion model for real-time single-channel speech enhancement. In ICASSP 2021\u20142021 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 6633\u20136637).","DOI":"10.1109\/ICASSP39728.2021.9414177"},{"key":"10059_CR14","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015, Dec). Deep residual learning for image recognition. In Proceedings of the IEEE computer society conference on computer vision and pattern recognition (Vol. 2016-Dec, pp. 770\u2013778). Retrieved from https:\/\/arxiv.org\/abs\/1512.03385v1"},{"key":"10059_CR15","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., & Weinberger, K. Q. (2017). Densely connected convolutional networks. In 2017 IEEE conference on computer vision and pattern recognition (CVPR) (pp. 2261\u20132269).","DOI":"10.1109\/CVPR.2017.243"},{"key":"10059_CR16","doi-asserted-by":"crossref","unstructured":"Jeevan, M., Dhingra, A., Hanmandlu, M., & Panigrahi, B. (2017). Robust speaker verification using GFCC based i-vectors. In Proceedings of the international conference on signal, networks, computing, and systems (pp. 85\u201391).","DOI":"10.1007\/978-81-322-3592-7_9"},{"key":"10059_CR17","unstructured":"Jee-weon, J., Tak, H., Jin Shim, H., Heo, H. -S., Lee, B. -J., Chung, S. -W., & Kinnunen, T. (2022). SASV 2022: The first spoofing- aware speaker verification challenge. In Proceedings Interspeech 2022 (pp. 2893\u20132897)."},{"key":"10059_CR18","doi-asserted-by":"crossref","unstructured":"Jung, J. -w., Heo, H. -S., Tak, H., Shim, H.-j., Chung, J. S., Lee, B. -J., & Evans, N. (2022). Aasist: Audio anti-spoofing using integrated spectro-temporal graph attention networks. In ICASSP 2022 - 2022 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 6367\u20136371).","DOI":"10.1109\/ICASSP43922.2022.9747766"},{"key":"10059_CR19","doi-asserted-by":"crossref","unstructured":"Jung, J. -W., Kim, J. -H., Shim, H. -J., Kim, S. -b., & Yu, H. -J. (2020, May). Selective deep speaker embedding enhancement for speaker verification. In Odyssey 2020 the speaker and language recognition workshop (pp. 171\u2013178).","DOI":"10.21437\/Odyssey.2020-25"},{"key":"10059_CR20","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1109\/TASLP.2021.3138681","volume":"30","author":"A Kanervisto","year":"2022","unstructured":"Kanervisto, A., Hautam\u00e4ki, V., Kinnunen, T., & Yamagishi, J. (2022). Optimizing tandem speaker verification and anti-spoofing systems. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 30, 477\u2013488. https:\/\/doi.org\/10.1109\/TASLP.2021.3138681","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10059_CR21","unstructured":"Kenny, P. (2010). Bayesian speaker verification with heavy-tailed priors. Odyssey."},{"key":"10059_CR22","doi-asserted-by":"crossref","unstructured":"Kim, J. -H., Heo, J., Jin Shim, H., & Yu, H. -J. (2022). Extended U-net for speaker verification in noisy environments. In Proceedings Interspeech 2022 (pp. 590\u2013594).","DOI":"10.21437\/Interspeech.2022-155"},{"key":"10059_CR23","doi-asserted-by":"crossref","unstructured":"Ko, T., Peddinti, V., Povey, D., Seltzer, M. L., & Khudanpur, S. (2017, Jun). A study on data augmentation of reverberant speech for robust speech recognition. In IEEE international conference on acoustics, speech and signal processing - proceedings (ICASSP) (pp. 5220\u20135224).","DOI":"10.1109\/ICASSP.2017.7953152"},{"issue":"11","key":"10059_CR24","doi-asserted-by":"publisher","first-page":"1619516212","DOI":"10.1007\/s11042-022-14068-4","volume":"82","author":"A Krobba","year":"2023","unstructured":"Krobba, A., Debyeche, M., & Selouani, S. A. (2023). A novel hybrid feature method based on Caelen auditory model and gammatone filterbank for robust speaker recognition under noisy environment and speech coding distortion. Multimedia Tools and Applications, 82(11), 1619516212. https:\/\/doi.org\/10.1007\/s11042-022-14068-4","journal-title":"Multimedia Tools and Applications"},{"key":"10059_CR25","doi-asserted-by":"publisher","unstructured":"Kumar Nandwana, M., Van Hout, J., Richey, C., Mclaren, M., Barrios, M. A., & Lawson, A. (2019). The VOiCES from a distance challenge 2019. In Interspeech 2019 (pp. 2438\u20132442). Retrieved from https:\/\/doi.org\/10.21437\/Interspeech.2019-1837","DOI":"10.21437\/Interspeech.2019-1837"},{"key":"10059_CR26","doi-asserted-by":"crossref","unstructured":"Liu, T., Das, R. K., Aik Lee, K., & Li, H. (2022). MFA: TDNN with multi-scale frequency-channel attention for text-independent speaker verification with short utterances. In ICASSP 2022 - 2022 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 7517\u20137521).","DOI":"10.1109\/ICASSP43922.2022.9747021"},{"key":"10059_CR27","doi-asserted-by":"crossref","unstructured":"Liu, X., Sahidullah, M., & Kinnunen, T. (2020). A comparative Re-assessment of feature extractors for deep speaker embeddings. In Proceedings Interspeech, 2020, 3221\u20133225.","DOI":"10.21437\/Interspeech.2020-1765"},{"key":"10059_CR28","doi-asserted-by":"crossref","unstructured":"Liu, X., Sahidullah, M., & Kinnunen, T. (2021a). Optimized power normalized cepstral coefficients towards robust deep speaker verification. In 2021 IEEE automatic speech recognition and understanding workshop - proceedings (ASRU 2021)  (pp. 185\u2013190).","DOI":"10.1109\/ASRU51503.2021.9688006"},{"key":"10059_CR29","doi-asserted-by":"publisher","first-page":"2187","DOI":"10.1109\/LSP.2021.3122796","volume":"28","author":"X Liu","year":"2021","unstructured":"Liu, X., Sahidullah, M., & Kinnunen, T. (2021). Optimizing multi-taper features for deep speaker verification. IEEE Signal Processing Letters, 28, 2187\u20132191. https:\/\/doi.org\/10.1109\/LSP.2021.3122796","journal-title":"IEEE Signal Processing Letters"},{"key":"10059_CR30","doi-asserted-by":"crossref","unstructured":"Min Kye, S., Kwon, Y., & Son Chung, J. (2021). Cross attentive pooling for speaker verification. In 2021 IEEE spoken language technology workshop (SLT) (pp. 294\u2013300).","DOI":"10.1109\/SLT48900.2021.9383565"},{"key":"10059_CR31","doi-asserted-by":"crossref","unstructured":"Mohammadamini, M., Matrouf, D., Bonastre, J. -F., Dowerah, S., Serizel, R., & Jouvet, D. (2022). A comprehensive exploration of noise robustness and noise compensation in resnet and TDNN-based speaker recognition systems. In Eusipco 2022-30th European signal processing conference.","DOI":"10.23919\/EUSIPCO55093.2022.9909726"},{"key":"10059_CR32","doi-asserted-by":"crossref","unstructured":"Mo\u0161ner, L., Plchot, O., Burget, L., & \u010cernock\u1ef3, J. H. (2022). Multisv: Dataset for far-field multichannel speaker verification. In ICASSP 2022 - 2022 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 7977\u20137981).","DOI":"10.1109\/ICASSP43922.2022.9746833"},{"key":"10059_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/J.CSL.2019.101027","volume":"60","author":"A Nagrani","year":"2020","unstructured":"Nagrani, A., Chung, J. S., Xie, W., & Zisserman, A. (2020). Voxceleb: Large-scale speaker verification in the wild. Computer Speech & Language, 60, 101027. https:\/\/doi.org\/10.1016\/J.CSL.2019.101027","journal-title":"Computer Speech & Language"},{"key":"10059_CR34","doi-asserted-by":"crossref","unstructured":"Nagraniy, A., Chungy, J. S., & Zisserman, A. (2017). VoxCeleb: A large-scale speaker identification dataset. In Interspeech 2017 (pp. 2616\u20132620).","DOI":"10.21437\/Interspeech.2017-950"},{"key":"10059_CR35","doi-asserted-by":"crossref","unstructured":"Okabe, K., Koshinaka, T., & Shinoda, K. (2018). Attentive statistics pooling for deep speaker embedding. In Proceedings Interspeech, 2018, 2252\u20132256.","DOI":"10.21437\/Interspeech.2018-993"},{"key":"10059_CR36","doi-asserted-by":"crossref","unstructured":"Povey, D., Cheng, G., Wang, Y., Li, K., Xu, H., Yarmohamadi, M., & Khudanpur, S. (2018). Semi-orthogonal low-rank matrix factorization for deep neural networks. In Proceedings of the annual conference of the international speech communication association, Interspeech, 2018, 3743\u20133747.","DOI":"10.21437\/Interspeech.2018-1417"},{"key":"10059_CR37","unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., & Vesely, K. V. (2011). The Kaldi Speech Recognition Toolkit. In IEEE 2011 workshop on automatic speech recognition and understanding. Hilton Waikoloa Village, Big Island, Hawaii, US."},{"key":"10059_CR38","doi-asserted-by":"crossref","unstructured":"Povey, D., Hadian, H., Ghahremani, P., Li, K., & Khudanpur, S. (2018, Sep). A time-restricted self-attention layer for ASR. In IEEE international conference on acoustics, speech and signal processing - proceedings (ICASSP), 2018, 5874\u20135878).","DOI":"10.1109\/ICASSP.2018.8462497"},{"key":"10059_CR39","unstructured":"Povey, D., Zhang, X., & Khudanpur, S. (2015). Parallel training of DNNs with natural gradient and parameter averaging. In 3rd international conference on learning representations, (ICLR 2015) - workshop track proceedings."},{"key":"10059_CR40","unstructured":"Qin, X., Li, M., Bu, H., Narayanan, S., & Li, H. (2022). Far-field speaker verification challenge (FFSVC) 2022: Challenge evaluation plan."},{"key":"10059_CR41","doi-asserted-by":"crossref","unstructured":"Qin, X., Li, M., Bu, H., Rao, W., Das, R. K., Narayanan, S., & Li, H. (2020). The INTERSPEECH 2020 far-field speaker verification challenge. In Proceedings Interspeech 2020 (pp. 3456\u20133460).","DOI":"10.21437\/Interspeech.2020-1249"},{"key":"10059_CR42","doi-asserted-by":"crossref","unstructured":"Richey, C., Barrios, M. A., Armstrong, Z., Bartels, C., Franco, H., Graciarena, M., & Ni, K. (2018). Voices obscured in complex environmental settings (VOICES) corpus. In Proceedings of the annual conference of the international speech communication association, Interspeech, 2018, 1566\u20131570.","DOI":"10.21437\/Interspeech.2018-1454"},{"key":"10059_CR43","doi-asserted-by":"crossref","unstructured":"Rybicka, M., Villalba, J., Zelasko, P., Dehak, N., & Kowalczyk, K. (2021). Spine2net: Spinenet with res2net and time-squeeze and - excitation blocks for speaker recognition. In Proceedings Interspeech, 1, 491\u2013495.","DOI":"10.21437\/Interspeech.2021-1163"},{"key":"10059_CR44","doi-asserted-by":"crossref","unstructured":"Segbroeck, M.V., Zaid, A., Kutsenko, K., Huerta, C., Nguyen, T., Luo, X., & Maas, R. (2020). DiPCo Dinner Party Corpus. In Proceedings Interspeech 2020 (pp. 434\u2013436).","DOI":"10.21437\/Interspeech.2020-2800"},{"key":"10059_CR45","unstructured":"Shao, Y., & Wang, D. L. (2008). Robust speaker identification using auditory features and computational auditory scene analysis. In  IEEE international conference on acoustics, speech and signal processing - proceedings (ICASSP)."},{"issue":"18","key":"10059_CR46","doi-asserted-by":"publisher","first-page":"6292","DOI":"10.3390\/app10186292","volume":"10","author":"H Shim","year":"2020","unstructured":"Shim, H.-j., Jung, J.-w., Kim, J.-h., Kim, S.-b, & Yu, H.-j. (2020). Integrated replay spoofingaware text-independent speaker verification. Applied Sciences, 10(18), 6292. https:\/\/doi.org\/10.3390\/app10186292","journal-title":"Applied Sciences"},{"key":"10059_CR47","doi-asserted-by":"crossref","unstructured":"Shtrosberg, A., Villalba, J., Dehak, N., Cohen, A., & Ben-Yair, B. (2021). Invariant representation learning for robust far-field speaker recognition. In International conference on statistical language and speech processing (pp. 97\u2013110).","DOI":"10.1007\/978-3-030-89579-2_9"},{"key":"10059_CR48","unstructured":"Snyder, D., Chen, G., & Povey, D. (2015). MUSAN: A music, speech, and noise corpus. arXiv preprint,Retrieved from arXiv:1510.08484v1http:\/\/www.itl.nist.gov\/iad\/mig\/tests\/sre\/"},{"key":"10059_CR49","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Povey, D., & Khudanpur, S. (2017). Deep neural network embeddings for text-independent speaker verification. In Proceedings Interspeech 2017 (pp. 999\u20131003).","DOI":"10.21437\/Interspeech.2017-620"},{"key":"10059_CR50","doi-asserted-by":"crossref","unstructured":"Snyder, D., Garcia-Romero, D., Sell, G., Povey, D., & Khudanpur, S. (2018). XVectors: Robust DNN embeddings for speaker recognition. In IEEE international conference on acoustics, speech and signal processing - proceedings (ICASSP), 2018, 5329\u20135333.","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"10059_CR51","doi-asserted-by":"publisher","first-page":"1293","DOI":"10.1109\/TASLP.2020.2986896","volume":"28","author":"H Taherian","year":"2020","unstructured":"Taherian, H., Wang, Z. Q., Chang, J., & Wang, D. (2020). Robust speaker recognition based on single-channel and multi-channel speech enhancement. IEEE\/ACM Transactions on Audio Speech and Language Processing, 28, 1293\u20131302. https:\/\/doi.org\/10.1109\/TASLP.2020.2986896","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"key":"10059_CR52","doi-asserted-by":"crossref","unstructured":"Thienpondt, J., Desplanques, B., & Demuynck, K. (2021). Integrating frequency translational invariance in TDNNs and frequency positional information in 2D ResNets to enhance speaker verification. In Proceedings Interspeech, 3, 2018\u20132022.","DOI":"10.21437\/Interspeech.2021-1570"},{"issue":"6","key":"10059_CR53","doi-asserted-by":"publisher","first-page":"1684","DOI":"10.1109\/TMM.2012.2199972","volume":"14","author":"X Valero","year":"2012","unstructured":"Valero, X., & Alias, F. (2012). Gammatone cepstral coefficients: Biologically inspired features for non-speech audio classification. IEEE Transactions on Multimedia, 14(6), 1684\u20131689. https:\/\/doi.org\/10.1109\/TMM.2012.2199972","journal-title":"IEEE Transactions on Multimedia"},{"key":"10059_CR54","doi-asserted-by":"crossref","unstructured":"Variani, E., Lei, X., Mcdermott, E., Lopez Moreno, I., & Gonzalez-Dominguez, J. (2014). Deep neural networks for small footprint text-dependent speaker verification. In 2014 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 4052\u20134056).","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"10059_CR55","doi-asserted-by":"publisher","unstructured":"Villalba, J., Chen, N., Snyder, D., Garcia-Romero, D., McCree, A., Sell, G., & Dehak, N. (2020). State-of-the-art speaker recognition with neural network embeddings in NIST SRE18 and speakers in the wild evaluations. Computer Speech & Language, 60, 101026. https:\/\/doi.org\/10.1016\/J.CSL.2019.101026","DOI":"10.1016\/J.CSL.2019.101026"},{"issue":"6","key":"10059_CR56","doi-asserted-by":"publisher","first-page":"2147","DOI":"10.3390\/s22062147","volume":"22","author":"M Wang","year":"2022","unstructured":"Wang, M., Feng, D., Su, T., & Chen, M. (2022). Attention-based temporal-frequency aggregation for speaker verification. Sensors, 22(6), 2147. https:\/\/doi.org\/10.3390\/s22062147","journal-title":"Sensors"},{"key":"10059_CR57","doi-asserted-by":"crossref","unstructured":"Wang, X., Qin, X., Wang, Y., Xu, Y., & Li, M. (2022). The DKU-OPPO system for the 2022 spoofing-aware speaker verification challenge. In Proceedings Interspeech, (pp. 4396\u20134400).","DOI":"10.21437\/Interspeech.2022-11190"},{"key":"10059_CR58","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2020.101114","volume":"64","author":"X Wang","year":"2020","unstructured":"Wang, X., Yamagishi, J., Todisco, M., Delgado, H., Nautsch, A., Evans, N., & Ling, Z.-H. (2020). Asvspoof 2019: A large-scale public database of synthesized, converted and replayed speech. Computer Speech & Language, 64, 101114. https:\/\/doi.org\/10.1016\/j.csl.2020.101114","journal-title":"In Computer Speech & Language"},{"key":"10059_CR59","first-page":"921","volume":"2020","author":"YQ Yu","year":"2020","unstructured":"Yu, Y. Q., & Li, W. J. (2020). Densely connected time delay neural network for speaker verification. In Proceedings Interspeech, 2020, 921\u2013925.","journal-title":"In Proceedings Interspeech"},{"key":"10059_CR60","first-page":"946","volume":"2020","author":"R Zhang","year":"2020","unstructured":"Zhang, R., Wei, J., Lu, W., Wang, L., Liu, M., Zhang, L., & Xu, J. (2020). ARET: Aggregated residual extended time-delay neural networks for speaker verification. In Proceedings Interspeech, 2020, 946\u2013950.","journal-title":"In Proceedings Interspeech"},{"key":"10059_CR61","first-page":"3573","volume":"2018","author":"Y Zhu","year":"2018","unstructured":"Zhu, Y., Ko, T., Snyder, D., Mak, B., & Povey, D. (2018). Self-attentive speaker embeddings for text-independent speaker verification. In Proceedings Interspeech, 2018, 3573\u20133577.","journal-title":"In Proceedings Interspeech"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10059-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-023-10059-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10059-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T10:12:35Z","timestamp":1704967955000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-023-10059-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,5]]},"references-count":61,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["10059"],"URL":"https:\/\/doi.org\/10.1007\/s10772-023-10059-4","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2023,11,5]]},"assertion":[{"value":"25 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 November 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}