{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T11:02:40Z","timestamp":1740135760453,"version":"3.37.3"},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,2,18]],"date-time":"2022-02-18T00:00:00Z","timestamp":1645142400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,2,18]],"date-time":"2022-02-18T00:00:00Z","timestamp":1645142400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61806078","62076094"],"award-info":[{"award-number":["61806078","62076094"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s00034-022-01964-1","type":"journal-article","created":{"date-parts":[[2022,2,18]],"date-time":"2022-02-18T05:02:42Z","timestamp":1645160562000},"page":"3931-3956","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["TRSD: A Time-Varying and Region-Changed Speech Database for Speaker Recognition"],"prefix":"10.1007","volume":"41","author":[{"given":"Dongdong","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinlin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3759-2041","authenticated-orcid":false,"given":"Zhe","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanqiong","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Baijun","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lizhi","family":"Cai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,2,18]]},"reference":[{"doi-asserted-by":"publisher","unstructured":"A. Abo Absa, M. Deriche, A two-stage hierarchical multilingual emotion recognition system using hidden Markov models and neural networks, in 2017 9th IEEE-GCC Conference and Exhibition (GCCCE), Manama, Bahrain (2017), pp. 1\u20136. https:\/\/doi.org\/10.1109\/IEEEGCC.2017.8448155","key":"1964_CR1","DOI":"10.1109\/IEEEGCC.2017.8448155"},{"issue":"8","key":"1964_CR2","doi-asserted-by":"publisher","first-page":"2581","DOI":"10.1007\/s00521-017-2848-4","volume":"30","author":"A Abumallouh","year":"2018","unstructured":"A. Abumallouh, Z. Qawaqneh, B. Barkana, New transformed features generated by deep bottleneck extractor and a GMM-UBM classifier for speaker age and gender classification. Neural Comput. Appl. 30(8), 2581\u20132593 (2018). https:\/\/doi.org\/10.1007\/s00521-017-2848-4","journal-title":"Neural Comput. Appl."},{"unstructured":"M. Ajili, J.F. Bonastre, S. Rossato, J. Kahn, G. Bernard, Fabiole, a speech database for forensic speaker comparison, in 10th Edition of Its Language Resources and Evaluation Conference (LREC 2016), Paris, France (2016)","key":"1964_CR3"},{"issue":"5","key":"1964_CR4","doi-asserted-by":"publisher","first-page":"3979","DOI":"10.1121\/1.4989083","volume":"141","author":"S Alcorn","year":"2020","unstructured":"S. Alcorn, K. Meemann, C. Clopper, R. Smiljanic, Acoustic cues and linguistic experience as factors in regional dialect classification. J. Acoust. Soc. Am. 141(5), 3979\u20133979 (2020). https:\/\/doi.org\/10.1121\/1.4989083","journal-title":"J. Acoust. Soc. Am."},{"key":"1964_CR5","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.specom.2013.07.010","volume":"56","author":"K Amino","year":"2014","unstructured":"K. Amino, T. Osanai, Native vs. non-native accent identification using Japanese spoken telephone numbers. Speech Commun. 56, 70\u201381 (2014)","journal-title":"Speech Commun."},{"doi-asserted-by":"publisher","unstructured":"H. Aronowitz, Inter dataset variability compensation for speaker recognition, in ICASSP 2014\u20142014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Florence, Italy (2014), pp. 4002\u20134006. https:\/\/doi.org\/10.1109\/ICASSP.2014.6854353","key":"1964_CR6","DOI":"10.1109\/ICASSP.2014.6854353"},{"key":"1964_CR7","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1016\/j.apacoust.2015.04.013","volume":"98","author":"B Barkana","year":"2015","unstructured":"B. Barkana, J. Zhou, A new pitch-range based feature set for a speaker\u2019s age and gender classification. Appl. Acoust. 98, 52\u201361 (2015). https:\/\/doi.org\/10.1016\/j.apacoust.2015.04.013","journal-title":"Appl. Acoust."},{"issue":"1","key":"1964_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000006","volume":"2","author":"Y Bengio","year":"2009","unstructured":"Y. Bengio, Learning deep architectures for ai. Foundations 2(1), 1\u2013127 (2009). https:\/\/doi.org\/10.1561\/2200000006","journal-title":"Foundations"},{"key":"1964_CR9","first-page":"341","volume":"5","author":"P Boersma","year":"2001","unstructured":"P. Boersma, D. Weenink, Praat, a system for doing phonetics by computer. Glot Int. 5, 341\u2013345 (2001)","journal-title":"Glot Int."},{"unstructured":"L. Brandschain, D. Graff, C. Cieri, K. Walker, C. Caruso, A. Neely, Greybeard longitudinal speech study, in International Conference on Language Resources and Evaluation, Valletta, Malta (2010)","key":"1964_CR10"},{"issue":"1","key":"1964_CR11","doi-asserted-by":"publisher","first-page":"218","DOI":"10.3745\/JIPS.04.0063","volume":"14","author":"H Chao","year":"2018","unstructured":"H. Chao, B.Y. Lu, Y.L. Liu, H.L. Zhi, Vocal effort detection based on spectral information entropy feature and model fusion. J. Inf. Process. Syst. 14(1), 218\u2013227 (2018). https:\/\/doi.org\/10.3745\/JIPS.04.0063","journal-title":"J. Inf. Process. Syst."},{"unstructured":"W. Chen, Y. Yang, First study on time-varying speaker recognition, in Phonetic Conference of China (2010), pp. 1\u20136","key":"1964_CR12"},{"unstructured":"X. Chen, Y. Peng, H. Song, Research on time-varying robustness in speaker recognition based on PLDA. Microcomput. Its Appl. (2016)","key":"1964_CR13"},{"unstructured":"R. Cole, M. Noel, V. Noel, The cslu speaker recognition corpus, in International Conference on Semiconductor Laser and Photonics (1999), pp. 3167\u20133170","key":"1964_CR14"},{"issue":"3","key":"1964_CR15","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1007\/s10772-017-9475-4","volume":"21","author":"RK Das","year":"2017","unstructured":"R.K. Das, S. Jelil, S.R.M. Prasanna, Multi-style speaker recognition database in practical conditions. Int. J. Speech Technol. 21(3), 409\u2013419 (2017). https:\/\/doi.org\/10.1007\/s10772-017-9475-4","journal-title":"Int. J. Speech Technol."},{"issue":"4","key":"1964_CR16","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"N. Dehak, P. Kenny, R. Dehak, P. Dumouchel, P. Ouellet, Front-end factor analysis for speaker verification. IEEE Trans. Audio Speech Lang. Process. 19(4), 788\u2013798 (2011). https:\/\/doi.org\/10.1109\/TASL.2010.2064307","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"7","key":"1964_CR17","doi-asserted-by":"publisher","first-page":"1975","DOI":"10.1109\/TASL.2011.2104955","volume":"19","author":"G Dobry","year":"2011","unstructured":"G. Dobry, R. Hecht, M. Avigal, Y. Zigel, Supervector dimension reduction for efficient speaker age estimation based on the acoustic speech signal. IEEE Trans. Audio Speech Lang. Process. 19(7), 1975\u20131985 (2011). https:\/\/doi.org\/10.1109\/TASL.2011.2104955","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"2C","key":"1964_CR18","first-page":"157","volume":"35","author":"G Droua-Hamdani","year":"2010","unstructured":"G. Droua-Hamdani, S.A. Selouani, M. Boudraa, Algerian Arabic speech database (ALGASD): corpus design and automatic speech recognition application. Arab. J. Sci. Eng. 35(2C), 157\u2013166 (2010)","journal-title":"Arab. J. Sci. Eng."},{"issue":"4","key":"1964_CR19","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1109\/TASLP.2017.2661705","volume":"25","author":"O Ghahabi","year":"2017","unstructured":"O. Ghahabi, J. Hernando, Deep learning for single and multi-session i-vector speaker recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 25(4), 807\u2013817 (2017). https:\/\/doi.org\/10.1109\/TASLP.2017.2661705","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"1","key":"1964_CR20","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1016\/j.csl.2012.01.003","volume":"27","author":"A Hanani","year":"2013","unstructured":"A. Hanani, M. Russell, M. Carey, Human and computer recognition of regional accents and ethnic groups from British English speech. Comput. Speech Lang. 27(1), 59\u201374 (2013). https:\/\/doi.org\/10.1016\/j.csl.2012.01.003","journal-title":"Comput. Speech Lang."},{"issue":"6","key":"1964_CR21","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1109\/MSP.2015.2462851","volume":"32","author":"J Hansen","year":"2015","unstructured":"J. Hansen, T. Hasan, Speaker recognition by machines and humans: a tutorial review. IEEE Signal Process. Mag. 32(6), 74\u201399 (2015). https:\/\/doi.org\/10.1109\/MSP.2015.2462851","journal-title":"IEEE Signal Process. Mag."},{"issue":"1","key":"1964_CR22","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1016\/j.jvoice.2006.07.004","volume":"22","author":"J Harnsberger","year":"2008","unstructured":"J. Harnsberger, R. Shrivastav, W. Brown, H. Rothman, H. Hollien, Speaking rate and fundamental frequency as speech cues to perceived age. J. Voice Off. J. Voice Found. 22(1), 58\u201369 (2008). https:\/\/doi.org\/10.1016\/j.jvoice.2006.07.004","journal-title":"J. Voice Off. J. Voice Found."},{"doi-asserted-by":"crossref","unstructured":"J. Harnsberger, R. Shrivastav, W. Jr, Modeling perceived vocal age in American English, in INTERSPEECH 2010, 11th Annual Conference of the International Speech Communication Association (2010), pp. 466\u2013469","key":"1964_CR23","DOI":"10.21437\/Interspeech.2010-199"},{"doi-asserted-by":"publisher","unstructured":"P. Har\u00e1r, R. Burget, M.K. Dutta, Speech emotion recognition with deep learning, in International Conference on Signal Processing and Integrated Networks, Noida, India (2017), pp. 137\u2013140. https:\/\/doi.org\/10.1109\/SPIN.2017.8049931","key":"1964_CR24","DOI":"10.1109\/SPIN.2017.8049931"},{"issue":"4","key":"1964_CR25","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1007\/s10772-012-9140-x","volume":"15","author":"BC Haris","year":"2012","unstructured":"B.C. Haris, G. Pradhan, A. Misra, S. Prasanna, R. Das, R. Sinha, Multivariability speaker recognition database in Indian scenario. Int. J. Speech Technol. 15(4), 441\u2013453 (2012). https:\/\/doi.org\/10.1007\/s10772-012-9140-x","journal-title":"Int. J. Speech Technol."},{"doi-asserted-by":"publisher","unstructured":"M. Hr\u00faz, Z. Zaj\u00edc, Convolutional neural network for speaker change detection in telephone speaker diarization system, in IEEE International Conference on Acoustics, New Orleans, LA, USA (2017), pp. 4945\u20134949. https:\/\/doi.org\/10.1109\/ICASSP.2017.7953097","key":"1964_CR26","DOI":"10.1109\/ICASSP.2017.7953097"},{"doi-asserted-by":"crossref","unstructured":"K. Jones, S. Strassel, K. Walker, D. Graff, J. Wright, Call my net corpus: a multilingual corpus for evaluation of speaker recognition technology, in Interspeech 2017 (2017), pp. 2621\u20132624","key":"1964_CR27","DOI":"10.21437\/Interspeech.2017-1521"},{"doi-asserted-by":"publisher","unstructured":"F. Kelly, A. Drygajlo, N. Harte, Speaker verification with long-term ageing data, in Proceedings\u20142012 5th IAPR International Conference on Biometrics, ICB 2012, New Delhi, India (2012), pp. 478\u2013483. https:\/\/doi.org\/10.1109\/ICB.2012.6199796","key":"1964_CR28","DOI":"10.1109\/ICB.2012.6199796"},{"issue":"5","key":"1964_CR29","doi-asserted-by":"publisher","first-page":"1068","DOI":"10.1016\/j.csl.2012.12.005","volume":"27","author":"F Kelly","year":"2013","unstructured":"F. Kelly, A. Drygajlo, N. Harte, Speaker verification in score-ageing-quality classification space. Comput. Speech Lang. 27(5), 1068\u20131084 (2013). https:\/\/doi.org\/10.1016\/j.csl.2012.12.005","journal-title":"Comput. Speech Lang."},{"key":"1964_CR30","doi-asserted-by":"publisher","first-page":"117327","DOI":"10.1109\/ACCESS.2019.2936124","volume":"7","author":"RA Khalil","year":"2019","unstructured":"R.A. Khalil, E. Jones, M. Babar, T. Jan, M. Zafar, T. Alhussain, Speech emotion recognition using deep learning techniques: a review. IEEE Access 7, 117327\u2013117345 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2936124","journal-title":"IEEE Access"},{"issue":"2","key":"1964_CR31","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1134\/S0005117919020097","volume":"80","author":"A Kolokolov","year":"2019","unstructured":"A. Kolokolov, I. Lyubinskii, Measuring the pitch of a speech signal using the autocorrelation function. Autom. Remote. Control 80(2), 317\u2013323 (2019). https:\/\/doi.org\/10.1134\/S0005117919020097","journal-title":"Autom. Remote. Control"},{"issue":"5","key":"1964_CR32","doi-asserted-by":"publisher","first-page":"1115","DOI":"10.1007\/s10772-019-09642-5","volume":"22","author":"A Krobba","year":"2019","unstructured":"A. Krobba, M. Debyeche, S.A. Selouani, Maximum entropy PLDA for robust speaker recognition under speech coding distortion. Int. J. Speech Technol. 22(5), 1115\u20131122 (2019). https:\/\/doi.org\/10.1007\/s10772-019-09642-5","journal-title":"Int. J. Speech Technol."},{"doi-asserted-by":"publisher","unstructured":"N. Kurpukdee, S. Kasuriya, V. Chunwijitra, C. Wutiwiwatchai, P. Lamsrichan, A study of support vector machines for emotional speech recognition, in International Conference of Information and Communication Technology for Embedded Systems, Chonburi, Thailand (2017), pp. 1\u20136. https:\/\/doi.org\/10.1109\/ICTEmSys.2017.7958773","key":"1964_CR33","DOI":"10.1109\/ICTEmSys.2017.7958773"},{"doi-asserted-by":"crossref","unstructured":"A. Lawson, A. Stauffer, E. Cupples, S. Wenndt, W. Bray, J. Grieco, The multi-session audio research project (MARP) corpus: goals, design and initial findings, in InterSpeech, Brighton, United Kingdom (2009), pp. 1811\u20131814","key":"1964_CR34","DOI":"10.21437\/Interspeech.2009-528"},{"doi-asserted-by":"crossref","unstructured":"A. Lazaridis, E. Khoury, J.P. Goldman, M. Avanzi, S. Marcel, P. Garner, Swiss French regional accent identification, in Odyssey: The Speaker and Language Recognition Workshop, Joensuu, Finland (2014)","key":"1964_CR35","DOI":"10.21437\/Odyssey.2014-17"},{"key":"1964_CR36","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y Lecun","year":"2015","unstructured":"Y. Lecun, Y. Bengio, G. Hinton, Deep learning. Nature 521, 436\u2013444 (2015). https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Nature"},{"doi-asserted-by":"crossref","unstructured":"K.A. Lee, A. Larcher, G. Wang, P. Kenny, N. Brummer, D. Van Leeuwen, H. Aronowitz, M. Kockmann, C. Vaquero, B. Ma, H. Li, T. Stafylakis, M.J. Alam, A. Swart, J. Perez, The reddots data collection for speaker recognition, in Interspeech, Dresden, Germany (2015), pp. 2996\u20133000","key":"1964_CR37","DOI":"10.21437\/Interspeech.2015-95"},{"doi-asserted-by":"publisher","unstructured":"D. Li, J. Wang, Y. Yang, PVD: a new pathological voice dataset for intra-speaker recognition research interest, in 2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP), Tianjin, China (2016), pp. 1\u20135. https:\/\/doi.org\/10.1109\/ISCSLP.2016.7918488","key":"1964_CR38","DOI":"10.1109\/ISCSLP.2016.7918488"},{"doi-asserted-by":"publisher","unstructured":"Y. Lukic, C. Vogt, O. Durr, T. Stadelmann, Speaker identification and clustering using convolutional neural networks, in IEEE International Workshop on Machine Learning for Signal Processing, Vietri sul Mare, Italy (2016), pp. 1\u20136. https:\/\/doi.org\/10.1109\/MLSP.2016.7738816","key":"1964_CR39","DOI":"10.1109\/MLSP.2016.7738816"},{"doi-asserted-by":"publisher","unstructured":"S. Mao, D. Tao, G. Zhang, P. Ching, T. Lee, Revisiting hidden Markov models for speech emotion recognition, in ICASSP 2019\u20142019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, UK (2019), pp. 6715\u20136719. https:\/\/doi.org\/10.1109\/ICASSP.2019.8683172","key":"1964_CR40","DOI":"10.1109\/ICASSP.2019.8683172"},{"doi-asserted-by":"publisher","unstructured":"M. McLaren, L. Ferrer, D. Cast\u00e1n Lavilla, A. Lawson, The speakers in the wild (sitw) speaker recognition database, in Interspeech 2016, San Francisco, CA, USA (2016), pp. 818\u2013822. https:\/\/doi.org\/10.21437\/Interspeech.2016-1129","key":"1964_CR41","DOI":"10.21437\/Interspeech.2016-1129"},{"issue":"6","key":"1964_CR42","first-page":"147","volume":"28","author":"YJ Miao","year":"2011","unstructured":"Y.J. Miao, X.F. Liu, X.M. Zhang, Compensation of speech enhancement distortion with combination of CMN and PMC. Microelectron. Comput. 28(6), 147\u2013160 (2011)","journal-title":"Microelectron. Comput."},{"key":"1964_CR43","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/j.csl.2019.06.004","volume":"58","author":"O Novotny","year":"2019","unstructured":"O. Novotny, O. Plchot, O. Glembek, J.H. Cernocky, L. Burget, Analysis of DNN speech signal enhancement for robust speaker recognition. Comput. Speech Lang. 58, 403\u2013421 (2019). https:\/\/doi.org\/10.1016\/j.csl.2019.06.004","journal-title":"Comput. Speech Lang."},{"key":"1964_CR44","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1016\/j.apacoust.2018.11.028","volume":"146","author":"T Ozseven","year":"2019","unstructured":"T. Ozseven, A novel feature selection method for speech emotion recognition. Appl. Acoust. 146, 320\u2013326 (2019). https:\/\/doi.org\/10.1016\/j.apacoust.2018.11.028","journal-title":"Appl. Acoust."},{"key":"1964_CR45","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/j.procs.2017.09.076","volume":"115","author":"S Paulose","year":"2017","unstructured":"S. Paulose, D. Mathew, A. Thomas, Performance evaluation of different modeling methods and classifiers with MFCC and IHC features for speaker recognition. Procedia Comput. Sci. 115, 55\u201362 (2017). https:\/\/doi.org\/10.1016\/j.procs.2017.09.076 (7th International Conference on Advances in Computing & Communications, ICACC-2017, 22\u201324 August 2017, Cochin, India)","journal-title":"Procedia Comput. Sci."},{"key":"1964_CR46","doi-asserted-by":"publisher","first-page":"16560","DOI":"10.1109\/ACCESS.2020.2967791","volume":"8","author":"Z Peng","year":"2020","unstructured":"Z. Peng, X. Li, Z. Zhu, M. Unoki, J. Dang, M. Akagi, Speech emotion recognition using 3d convolutions and attention-based sliding recurrent networks with auditory front-ends. IEEE Access 8, 16560\u201316572 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.2967791","journal-title":"IEEE Access"},{"doi-asserted-by":"publisher","unstructured":"X. Qin, H. Bu, M. Li, Hi-mia: a far-field text-dependent speaker verification database and the baselines, in ICASSP 2020\u20142020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Barcelona, Spain (2020), pp. 7609\u20137613. https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054423","key":"1964_CR47","DOI":"10.1109\/ICASSP40776.2020.9054423"},{"issue":"5500","key":"1964_CR48","doi-asserted-by":"publisher","first-page":"2323","DOI":"10.1126\/science.290.5500.2323","volume":"290","author":"S Roweis","year":"2001","unstructured":"S. Roweis, L. Saul, Nonlinear dimensionality reduction by locally linear embedding. Science 290(5500), 2323\u20132326 (2001). https:\/\/doi.org\/10.1126\/science.290.5500.2323","journal-title":"Science"},{"issue":"1","key":"1964_CR49","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/TASLP.2015.2493366","volume":"24","author":"R Saeidi","year":"2016","unstructured":"R. Saeidi, P. Alku, T. Backstrom, Feature extraction using power-law adjusted linear prediction with application to speaker recognition under severe vocal effort mismatch. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(1), 42\u201353 (2016). https:\/\/doi.org\/10.1109\/TASLP.2015.2493366","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"unstructured":"S. Sch\u00f6tz, Perception Analysis and Synthesis of Speaker Age (Department of Linguistics and Phonetics, Centre for Languages and Literature, 2006)","key":"1964_CR50"},{"doi-asserted-by":"crossref","unstructured":"D. Snyder, D. Garcia-Romero, D. Povey, S. Khudanpur, Deep neural network embeddings for text-independent speaker verification, in Interspeech (2017), pp. 999\u20131003","key":"1964_CR51","DOI":"10.21437\/Interspeech.2017-620"},{"doi-asserted-by":"publisher","unstructured":"I. Tashev, Z.Q. Wang, K. Godin, Speech emotion recognition based on gaussian mixture models and deep neural networks, in Information Theory and Applications Workshop, San Diego, CA, USA (2017), pp. 1\u20134. https:\/\/doi.org\/10.1109\/ITA.2017.8023477","key":"1964_CR52","DOI":"10.1109\/ITA.2017.8023477"},{"issue":"1","key":"1964_CR53","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1109\/TAFFC.2015.2392101","volume":"6","author":"K Wang","year":"2015","unstructured":"K. Wang, N. An, B.N. Li, Y. Zhang, L. Li, Speech emotion recognition using Fourier parameters. IEEE Trans. Affect. Comput. 6(1), 69\u201375 (2015). https:\/\/doi.org\/10.1109\/TAFFC.2015.2392101","journal-title":"IEEE Trans. Affect. Comput."},{"doi-asserted-by":"publisher","unstructured":"L. Wang, N. Kitaoka, S. Nakagawa, Analysis of effect of compensation parameter estimation for cmn on speech\/speaker recognition, in International Symposium on Signal Processing and Its Applications, Sharjah, United Arab Emirates (2007), pp. 1\u20134. https:\/\/doi.org\/10.1109\/ISSPA.2007.4555505","key":"1964_CR54","DOI":"10.1109\/ISSPA.2007.4555505"},{"unstructured":"L. Wang, X. Wu, F. Zheng, C. Zhang, An investigation into better frequency warping for time-varying speaker recognition, in Asia-Pacific Signal and Information Processing Association Summit and Conference, Hollywood, CA, USA (2012), pp. 1\u20134","key":"1964_CR55"},{"unstructured":"L. Wang, F. Zheng, Creation of time-varying voiceprint database. Oriental-COCOSDA (2010)","key":"1964_CR56"},{"unstructured":"L. Wang, T. Zheng, C. Zhang, G. Wang, Discrimination-emphasized mel-frequency-warping for time-varying speaker recognition, in APSIPA ASC 2011\u2014Asia-Pacific Signal and Information Processing Association Annual Summit and Conference 2011 (2011), pp. 731\u2013734","key":"1964_CR57"},{"doi-asserted-by":"crossref","unstructured":"Z. Wu, P. Swietojanski, C. Veaux, S. Renals, A study of speaker adaptation for dnn-based speech synthesis, in INTERSPEECH, Dresden, Germany (2015), pp. 879\u2013883","key":"1964_CR58","DOI":"10.21437\/Interspeech.2015-270"},{"doi-asserted-by":"publisher","unstructured":"W.S. Yang, X. Wang, S. Zhou, H.x. Zhao, J. Huang, An improved method for voiceprint recognition, in Complex, Intelligent, and Software Intensive Systems\u2014Proceedings of the 12th International Conference on Complex, Intelligent, and Software Intensive Systems, CISIS-2018, Matsue, Japan, 4\u20136 July 2018, Advances in Intelligent Systems and Computing, eds. by L. Barolli, N. Javaid, M. Ikeda, M. Takizawa, vol. 772 (Springer, Berlin, 2018), pp. 735\u2013746. https:\/\/doi.org\/10.1007\/978-3-319-93659-8_67","key":"1964_CR59","DOI":"10.1007\/978-3-319-93659-8_67"},{"key":"1964_CR60","doi-asserted-by":"publisher","first-page":"23496","DOI":"10.1109\/ACCESS.2020.2969032","volume":"8","author":"S Zhang","year":"2020","unstructured":"S. Zhang, A. Chen, W. Guo, Y. Cui, X. Zhao, L. Liu, Learning deep binaural representations with deep convolutional neural networks for spontaneous speech emotion recognition. IEEE Access 8, 23496\u201323505 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.2969032","journal-title":"IEEE Access"},{"issue":"8","key":"1964_CR61","doi-asserted-by":"publisher","first-page":"1127","DOI":"10.1002\/spe.2487","volume":"47","author":"W Zhang","year":"2017","unstructured":"W. Zhang, D. Zhao, Z. Chai, L. Yang, X. Liu, F. Gong, S. Yang, Deep learning and SVM-based emotion recognition from Chinese speech for smart affective services. Softw. Pract. Exp. 47(8), 1127\u20131138 (2017). https:\/\/doi.org\/10.1002\/spe.2487","journal-title":"Softw. Pract. Exp."},{"doi-asserted-by":"publisher","unstructured":"Y. Zhang, J. Du, Z. Wang, J. Zhang, t. Yanhui, Attention based fully convolutional network for speech emotion recognition, in 2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), Honolulu, HI, USA (2019), pp. 1771\u20131775. https:\/\/doi.org\/10.23919\/APSIPA.2018.8659587","key":"1964_CR62","DOI":"10.23919\/APSIPA.2018.8659587"},{"doi-asserted-by":"publisher","unstructured":"F. Zheng, Q. Jin, L. Li, J. Wang, F. Bie, An overview of robustness related issues in speaker recognition, in Asia-Pacific Signal and Information Processing Association, Summit and Conference, Chiang Mai, Thailand (2014), pp. 1\u201310. https:\/\/doi.org\/10.1109\/APSIPA.2014.7041826","key":"1964_CR63","DOI":"10.1109\/APSIPA.2014.7041826"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-01964-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-022-01964-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-01964-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T09:04:39Z","timestamp":1674810279000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-022-01964-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,18]]},"references-count":63,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["1964"],"URL":"https:\/\/doi.org\/10.1007\/s00034-022-01964-1","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2022,2,18]]},"assertion":[{"value":"8 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 January 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 February 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The manuscript has been approved by all authors for publication, and no conflict of interest exits in the submission of it.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}