{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T19:49:54Z","timestamp":1771703394731,"version":"3.50.1"},"reference-count":70,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s00034-023-02562-5","type":"journal-article","created":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T14:01:48Z","timestamp":1702735308000},"page":"2341-2384","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":29,"title":["Speech Emotion Recognition Using Generative Adversarial Network and Deep Convolutional Neural Network"],"prefix":"10.1007","volume":"43","author":[{"given":"Kishor","family":"Bhangale","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3938-7495","authenticated-orcid":false,"given":"Mohanaprasad","family":"Kothandaraman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,16]]},"reference":[{"issue":"11","key":"2562_CR1","doi-asserted-by":"publisher","first-page":"5632","DOI":"10.1007\/s00034-021-01737-2","volume":"40","author":"A Abrol","year":"2021","unstructured":"A. Abrol, N. Kapoor, P.K. Lehana, Fractal-based speech analysis for emotional content estimation. Circuits Syst. Signal Process. 40(11), 5632\u20135653 (2021). https:\/\/doi.org\/10.1007\/s00034-021-01737-2","journal-title":"Circuits Syst. Signal Process."},{"issue":"2","key":"2562_CR2","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1016\/j.specom.2012.08.007","volume":"55","author":"MJ Alam","year":"2013","unstructured":"M.J. Alam, T. Kinnunen, P. Kenny, P. Ouellet, D. O\u2019Shaughnessy, Multitaper MFCC and PLP features for speaker verification using i-vectors. Speech Commun. 55(2), 237\u2013251 (2013). https:\/\/doi.org\/10.1016\/j.specom.2012.08.007","journal-title":"Speech Commun."},{"issue":"11","key":"2562_CR3","doi-asserted-by":"publisher","first-page":"5681","DOI":"10.1007\/s00034-020-01429-3","volume":"39","author":"SB Alex","year":"2020","unstructured":"S.B. Alex, L. Mary, B.P. Babu, Attention and feature selection for automatic speech emotion recognition using utterance and syllable-level prosodic features. Circuits Syst. Signal Process. 39(11), 5681\u20135709 (2020). https:\/\/doi.org\/10.1007\/s00034-020-01429-3","journal-title":"Circuits Syst. Signal Process."},{"issue":"2","key":"2562_CR4","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s10462-012-9368-5","volume":"43","author":"CN Anagnostopoulos","year":"2012","unstructured":"C.N. Anagnostopoulos, T. Iliou, I. Giannoukos, Features and classifiers for emotion recognition from speech: a survey from 2000 to 2011. Artif. Intell. Rev. 43(2), 155\u2013177 (2012). https:\/\/doi.org\/10.1007\/s10462-012-9368-5","journal-title":"Artif. Intell. Rev."},{"key":"2562_CR5","doi-asserted-by":"publisher","unstructured":"A.M. Badshah, J. Ahmad, N. Rahim, S.W. Baik, Speech emotion recognition from spectrograms with deep convolutional neural network, in 2017 International Conference on Platform Technology and Service (PlatCon) (IEEE, 2017), pp. 1\u20135. https:\/\/doi.org\/10.1109\/PlatCon.2017.7883728","DOI":"10.1109\/PlatCon.2017.7883728"},{"key":"2562_CR6","doi-asserted-by":"publisher","unstructured":"K.B. Bhangale, K. Mohanaprasad, Speech emotion recognition using Mel frequency log spectrogram and deep convolutional neural network, in Futuristic Communication and Network Technologies (2022), pp. 241\u2013250. https:\/\/doi.org\/10.1007\/978-981-16-4625-6_24","DOI":"10.1007\/978-981-16-4625-6_24"},{"issue":"2","key":"2562_CR7","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1007\/s10772-021-09808-0","volume":"24","author":"KB Bhangale","year":"2021","unstructured":"K.B. Bhangale, K. Mohanaprasad, A review on speech processing using machine learning paradigm. Int. J. Speech Technol. 24(2), 367\u2013388 (2021). https:\/\/doi.org\/10.1007\/s10772-021-09808-0","journal-title":"Int. J. Speech Technol."},{"key":"2562_CR8","doi-asserted-by":"publisher","first-page":"109613","DOI":"10.1016\/j.apacoust.2023.109613","volume":"212","author":"KB Bhangale","year":"2023","unstructured":"K.B. Bhangale, K. Mohanaprasad, Speech emotion recognition using the novel PEmoNet (Parallel Emotion Network. Appl. Acoust. 212, 109613 (2023). https:\/\/doi.org\/10.1016\/j.apacoust.2023.109613","journal-title":"Appl. Acoust."},{"key":"2562_CR9","doi-asserted-by":"crossref","unstructured":"F. Burkhardt, A. Paeschke, M. Rolfes, W.F. Sendlmeier, B. Weiss, A database of German emotional speech, in 9th European Conference on Speech Communication and Technology, vol. 5 (2005), pp. 1517\u20131520","DOI":"10.21437\/Interspeech.2005-446"},{"key":"2562_CR10","doi-asserted-by":"publisher","unstructured":"A. Chatziagapi, G. Paraskevopoulos, D. Sgouropoulos, G. Pantazopoulos, M. Nikandrou, T. Giannakopoulos, A. Katsamanis, A. Potamianos, S. Narayanan, Data augmentation using GANs for speech emotion recognition, in Interspeech (2019), pp. 171\u2013175. https:\/\/doi.org\/10.21437\/Interspeech.2019-2561","DOI":"10.21437\/Interspeech.2019-2561"},{"issue":"1","key":"2562_CR11","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"A. Creswell, T. White, V. Dumoulin, K. Arulkumaran, B. Sengupta, A.A. Bharath, Generative adversarial networks: an overview. IEEE Signal Process. Mag. 35(1), 53\u201365 (2018). https:\/\/doi.org\/10.1109\/MSP.2017.2765202","journal-title":"IEEE Signal Process. Mag."},{"key":"2562_CR12","doi-asserted-by":"publisher","unstructured":"F. Dellaert, T. Polzin, A. Waibel, Recognizing emotion in speech, in Fourth International Conference on Spoken Language Processing (1996). https:\/\/doi.org\/10.1109\/ICSLP.1996.608022","DOI":"10.1109\/ICSLP.1996.608022"},{"issue":"3\u20134","key":"2562_CR13","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1561\/2000000039","volume":"7","author":"L Deng","year":"2014","unstructured":"L. Deng, D. Yu et al., Deep learning: methods and applications. Found. Trends Signal Process. 7(3\u20134), 197\u2013387 (2014). https:\/\/doi.org\/10.1561\/2000000039","journal-title":"Found. Trends Signal Process."},{"key":"2562_CR14","doi-asserted-by":"publisher","unstructured":"J. Deng, Z. Zhang, E. Marchi, B. Schuller, Sparse autoencoder-based feature transfer learning for speech emotion recognition, in Humaine Association Conference on Affective Computing and Intelligent Interaction (Geneva, 2013) pp. 511\u2013516. https:\/\/doi.org\/10.1109\/ACII.2013.90","DOI":"10.1109\/ACII.2013.90"},{"key":"2562_CR15","doi-asserted-by":"publisher","unstructured":"H. Dinkel, N. Chen, Y. Qian, K. Yu, End-to-end spoofing detection with raw waveform CLDNNS, in 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2017), pp. 4860\u20134864. https:\/\/doi.org\/10.1109\/ICASSP.2017.7953080","DOI":"10.1109\/ICASSP.2017.7953080"},{"issue":"3","key":"2562_CR16","doi-asserted-by":"publisher","first-page":"592","DOI":"10.3390\/s20030592","volume":"20","author":"A Dzedzickis","year":"2020","unstructured":"A. Dzedzickis, A. Kaklauskas, V. Bucinskas, Human emotion recognition: review of sensors and methods. Sensors 20(3), 592 (2020). https:\/\/doi.org\/10.3390\/s20030592","journal-title":"Sensors"},{"key":"2562_CR17","doi-asserted-by":"publisher","unstructured":"S.E. Eskimez, D. Dimitriadis, R. Gmyr, K, Kumanati, GAN-based data generation for speech emotion recognition, in INTERSPEECH (2020), pp. 3446\u20133450. https:\/\/doi.org\/10.21437\/Interspeech.2020-2898","DOI":"10.21437\/Interspeech.2020-2898"},{"issue":"1","key":"2562_CR18","doi-asserted-by":"publisher","first-page":"6113","DOI":"10.1007\/s00034-022-02068-6","volume":"41","author":"MS Fahad","year":"2022","unstructured":"M.S. Fahad, A. Ranjan, A. Deepak, G. Pradhan, Speaker Adversarial Neural Network (SANN) for Speaker-independent speech emotion recognition. Circuits Syst. Signal Process. 41(1), 6113\u20136135 (2022). https:\/\/doi.org\/10.1007\/s00034-022-02068-6","journal-title":"Circuits Syst. Signal Process."},{"issue":"1","key":"2562_CR19","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/s00034-022-02130-3","volume":"42","author":"MR Falahzadeh","year":"2023","unstructured":"M.R. Falahzadeh, F. Farokhi, A. Harimi, R. Sabbaghi-Nadooshan, Deep convolutional neural network and Gray wolf optimization algorithm for speech emotion recognition. Circuits Syst. Signal Process. 42(1), 449\u2013492 (2023). https:\/\/doi.org\/10.1007\/s00034-022-02130-3","journal-title":"Circuits Syst. Signal Process."},{"key":"2562_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/s00034-023-02315-4","author":"MR Falahzadeh","year":"2023","unstructured":"M.R. Falahzadeh, F. Farokhi, A. Harimi, R. Sabbaghi-Nadooshan, A 3D tensor representation of speech and 3D convolutional neural network for emotion recognition. Circuits Syst. Signal Process. (2023). https:\/\/doi.org\/10.1007\/s00034-023-02315-4","journal-title":"Circuits Syst. Signal Process."},{"key":"2562_CR21","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neunet.2017.02.013","volume":"92","author":"HM Fayek","year":"2017","unstructured":"H.M. Fayek, M. Lech, L. Cavedon, Evaluating deep learning architectures for speech emotion recognition. Neural Netw. 92, 60\u201368 (2017). https:\/\/doi.org\/10.1016\/j.neunet.2017.02.013","journal-title":"Neural Netw."},{"key":"2562_CR22","doi-asserted-by":"publisher","unstructured":"A. Ghosh, A. Sufian, F. Sultana, A. Chakrabarti, D. De, Fundamental concepts of convolutional neural network, in Recent Trends and Advances in Artificial Intelligence and Internet of Things (Springer, Cham, 2020), pp. 519\u2013567. https:\/\/doi.org\/10.1007\/978-3-030-32644-9_36","DOI":"10.1007\/978-3-030-32644-9_36"},{"key":"2562_CR23","unstructured":"I. Goodfellow, J. Pouget-Abadie, M. Mirza, B. Xu, D. Warde-Farley, S. Ozair, A. Courville, Y. Bengio, Generative adversarial nets, in Advances in Neural Information Processing Systems (2014), pp. 2672\u20132680."},{"issue":"11","key":"2562_CR24","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"I. Goodfellow, J. Pouget-Abadie, M. Mirza, B. Xu, D. Warde-Farley, S. Ozair, A. Courville, Y. Bengio, Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"2562_CR25","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.bspc.2014.10.011","volume":"17","author":"A Guidi","year":"2015","unstructured":"A. Guidi, N. Vanello, G. Bertschy, Landini L. Gentili, E.P. Scilingo, Automatic analysis of speech F0 contour for the characterization of mood changes in bipolar patients. Biomed. Signal Process. Control 17, 29\u201337 (2015). https:\/\/doi.org\/10.1016\/j.bspc.2014.10.011","journal-title":"Biomed. Signal Process. Control"},{"key":"2562_CR26","doi-asserted-by":"publisher","unstructured":"T. Guo, J. Dong, H. Li, Y. Gao, Simple convolutional neural network on image classification, in 2017 IEEE 2nd International Conference on Big Data Analysis (ICBDA) (IEEE, 2017), pp. 721\u2013724. https:\/\/doi.org\/10.1109\/ICBDA.2017.8078730","DOI":"10.1109\/ICBDA.2017.8078730"},{"key":"2562_CR27","doi-asserted-by":"publisher","unstructured":"J. Guo, K. Kumatani, M. Sun, M. Wu, A. Raju, N. Str\u00f6m, A. Mandal, Time-delayed bottleneck highway networks using a DFT feature for keyword spotting, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2018), pp. 5489\u20135493. https:\/\/doi.org\/10.1109\/ICASSP.2018.8462166","DOI":"10.1109\/ICASSP.2018.8462166"},{"key":"2562_CR28","doi-asserted-by":"crossref","unstructured":"K. Han, D. Yu, I. Tashev, Speech emotion recognition using deep neural network and extreme learning machine, in Proceedings of the INTERSPEECH 2014 (Singapore, 2014), pp. 223\u2013227","DOI":"10.21437\/Interspeech.2014-57"},{"key":"2562_CR29","doi-asserted-by":"publisher","unstructured":"J. Han, Z. Zhang, F. Ringeval, B. Schuller, Reconstruction-error based learning for continuous emotion recognition in speech, in Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2017), pp. 2367\u20132371. https:\/\/doi.org\/10.1109\/ICASSP.2017.7952580","DOI":"10.1109\/ICASSP.2017.7952580"},{"key":"2562_CR30","doi-asserted-by":"publisher","unstructured":"M. Hansson-Sandsten, J. Sandberg, Optimal cepstrum estimation using multiple windows, in Proceedings of the ICASSP (2009), pp. 3077\u20133080. https:\/\/doi.org\/10.1109\/ICASSP.2009.4960274","DOI":"10.1109\/ICASSP.2009.4960274"},{"key":"2562_CR31","unstructured":"S. Haq, P.J. Jackson, J. Edge, Audio\u2013visual feature selection and reduction for emotion classification, in Proceedings of the International Conference on Auditory\u2013Visual Speech Processing (AVSP\u201908) (Tangalooma, Australia, 2008)."},{"key":"2562_CR32","doi-asserted-by":"publisher","unstructured":"Z. Huang, M. Dong, Q. Mao, Y. Zhan, Speech emotion recognition using CNN, in Proceedings of the 22nd ACM International Conference on Multimedia (2014), pp. 801\u2013804. https:\/\/doi.org\/10.1145\/2647868.2654984","DOI":"10.1145\/2647868.2654984"},{"key":"2562_CR33","doi-asserted-by":"publisher","first-page":"101894","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"D. Issa, M.F. Demirci, A. Yazici, Speech emotion recognition with deep convolutional neural networks. Biomed. Signal Process. Control 59, 101894 (2020). https:\/\/doi.org\/10.1016\/j.bspc.2020.101894","journal-title":"Biomed. Signal Process. Control"},{"key":"2562_CR34","doi-asserted-by":"publisher","unstructured":"D. Ito, T. Okamoto, S. Koakutsu, A learning algorithm with a gradient normalization and a learning rate adaptation for the mini-batch type learning, in 56th Annual Conference of the Society of Instrument and Control Engineers of Japan (SICE) (Kanazawa, 2017), pp. 811\u2013816. https:\/\/doi.org\/10.23919\/SICE.2017.8105654","DOI":"10.23919\/SICE.2017.8105654"},{"issue":"9","key":"2562_CR35","doi-asserted-by":"publisher","first-page":"4459","DOI":"10.1007\/s00034-020-01377-y","volume":"39","author":"SR Kadiri","year":"2020","unstructured":"S.R. Kadiri, P. Gangamohan, S.V. Gangashetty, P. Alku, B. Yegnanarayana, Excitation features of speech for emotion recognition using neutral speech as reference. Circuits Syst. Signal Process. 39(9), 4459\u20134481 (2020). https:\/\/doi.org\/10.1007\/s00034-020-01377-y","journal-title":"Circuits Syst. Signal Process."},{"key":"2562_CR36","doi-asserted-by":"publisher","unstructured":"P. Kim, Convolutional neural network, in MATLAB Deep Learning (Apress, Berkeley, 2017), pp. 121\u2013147. https:\/\/doi.org\/10.1007\/978-1-4842-2845-6_6","DOI":"10.1007\/978-1-4842-2845-6_6"},{"key":"2562_CR37","doi-asserted-by":"crossref","unstructured":"T. Kinnunen, R. Saeidi, J. Sandberg, M. Hansson-Sandsten, What else is new than the Hamming window? Robust MFCCs for speaker recognition via multitapering, in Eleventh Annual Conference of the International Speech Communication Association (2010)","DOI":"10.21437\/Interspeech.2010-724"},{"key":"2562_CR38","doi-asserted-by":"publisher","unstructured":"L. Li, Y. Zhao, D. Jiang, Y. Zhang, F. Wang, I. Gonzalez, E. Valentin, H. Sahli. Hybrid deep neural network--hidden Markov Model (DNN-HMM) based speech emotion recognition, in 2013 Humaine Association Conference on Affective Computing and Intelligent Interaction (IEEE, 2013), pp. 312\u2013317. https:\/\/doi.org\/10.1109\/ACII.2013.58","DOI":"10.1109\/ACII.2013.58"},{"key":"2562_CR39","doi-asserted-by":"publisher","unstructured":"W. Lim, D.J. Lim, T. Lee, Speech emotion recognition using convolutional and recurrent neural networks, in 2016 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA) (IEEE, 2016), pp. 1\u20134. https:\/\/doi.org\/10.1109\/APSIPA.2016.7820699","DOI":"10.1109\/APSIPA.2016.7820699"},{"key":"2562_CR40","doi-asserted-by":"publisher","first-page":"e0196391","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"S.R. Livingstone, F.A. Russo, The Ryerson audio\u2013visual database of emotional speech and song (RAVDESS): a dynamic, multimodal set of facial and vocal expressions in North American English. PLoS ONE 13, e0196391 (2018)","journal-title":"PLoS ONE"},{"key":"2562_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2022.106646","author":"M Maithri","year":"2022","unstructured":"M. Maithri, U. Raghavendra, A. Gudigar, J. Samanth, P.D. Barua, M. Murugappan, Y. Chakole, U.R. Acharya, Automated emotion recognition: current trends and future perspectives. Comput. Methods Programs Biomed. (2022). https:\/\/doi.org\/10.1016\/j.cmpb.2022.106646","journal-title":"Comput. Methods Programs Biomed."},{"key":"2562_CR42","doi-asserted-by":"publisher","unstructured":"W. Minhua, K. Kumatani, S. Sundaram, N. Str\u00f6m, B. Hoffmeister, Frequency domain multi-channel acoustic modeling for distant speech recognition, in 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2019), pp. 6640\u20136644. https:\/\/doi.org\/10.1109\/ICASSP.2019.8682977","DOI":"10.1109\/ICASSP.2019.8682977"},{"key":"2562_CR43","doi-asserted-by":"publisher","unstructured":"S. Mirsamadi, E. Barsoum, C. Zhang, Automatic speech emotion recognition using recurrent neural networks with local attention, in Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2017), pp. 2227\u20132231. https:\/\/doi.org\/10.1109\/ICASSP.2017.7952552","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"2562_CR44","doi-asserted-by":"publisher","unstructured":"E. Mower, A. Metallinou, C.C. Lee, A. Kazemzadeh, C. Busso, S. Lee, S. Narayanan, Interpreting ambiguous emotional expressions, in 2009 3rd International Conference on Affective Computing and Intelligent Interaction and Workshops (IEEE, 2009), pp. 1\u20138. https:\/\/doi.org\/10.1109\/ACII.2009.5349500","DOI":"10.1109\/ACII.2009.5349500"},{"key":"2562_CR45","doi-asserted-by":"publisher","unstructured":"J. Niu, Y. Qian, K. Yu, Acoustic emotion recognition using deep neural network, in The 9th International Symposium on Chinese Spoken Language Processing (IEEE, 2014), pp. 128\u2013132. https:\/\/doi.org\/10.1109\/ISCSLP.2014.6936657","DOI":"10.1109\/ISCSLP.2014.6936657"},{"key":"2562_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.specom.2019.08.006","volume":"114","author":"Y Qian","year":"2019","unstructured":"Y. Qian, H. Hu, T. Tan, Data augmentation using generative adversarial networks for robust speech recognition. Speech Commun. 114, 1\u20139 (2019). https:\/\/doi.org\/10.1016\/j.specom.2019.08.006","journal-title":"Speech Commun."},{"issue":"1","key":"2562_CR47","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1109\/78.365298","volume":"43","author":"KS Riedel","year":"1995","unstructured":"K.S. Riedel, A. Sidorenko, Minimum bias multiple taper spectral estimation. IEEE Trans. Signal Proc. 43(1), 188\u2013195 (1995). https:\/\/doi.org\/10.1109\/78.365298","journal-title":"IEEE Trans. Signal Proc."},{"key":"2562_CR48","doi-asserted-by":"publisher","unstructured":"S. Sahu, R. Gupta, G. Sivaraman, W. AbdAlmageed, C. EspyWilson, Adversarial auto-encoders for speech based emotion recognition, in Proceedings of the Interspeech (2017), pp. 1243\u20131247. https:\/\/doi.org\/10.48550\/arXiv.1806.02146","DOI":"10.48550\/arXiv.1806.02146"},{"issue":"4","key":"2562_CR49","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1109\/LSP.2010.2040228","volume":"17","author":"J Sandberg","year":"2010","unstructured":"J. Sandberg, M. Hansson-Sandsten, T. Kinnunen, R. Saeidi, P. Flandrin, P. Borgnat, Multi-taper estimation of frequency-warped cepstra with application to speaker verification. IEEE Signal Process. Lett. 17(4), 343\u2013346 (2010). https:\/\/doi.org\/10.1109\/LSP.2010.2040228","journal-title":"IEEE Signal Process. Lett."},{"issue":"1","key":"2562_CR50","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41598-019-52737-x","volume":"9","author":"V Sandfort","year":"2019","unstructured":"V. Sandfort, K. Yan, P.J. Pickhardt, R.M. Summers, Data augmentation using generative adversarial networks (CycleGAN) to improve generalizability in CT segmentation tasks. Sci. Rep. 9(1), 1\u20139 (2019). https:\/\/doi.org\/10.1038\/s41598-019-52737-x","journal-title":"Sci. Rep."},{"issue":"1\/2","key":"2562_CR51","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/S0167-6393(02)00084-5","volume":"40","author":"K Scherer","year":"2003","unstructured":"K. Scherer, Vocal communication of emotion: a review of research paradigms. Speech Commun. 40(1\/2), 227\u2013256 (2003). https:\/\/doi.org\/10.1016\/S0167-6393(02)00084-5","journal-title":"Speech Commun."},{"key":"2562_CR52","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"J. Schmidhuber, Deep learning in neural networks: an overview. Neural Netw. 61, 85\u2013117 (2015). https:\/\/doi.org\/10.1016\/j.neunet.2014.09.003","journal-title":"Neural Netw."},{"key":"2562_CR53","doi-asserted-by":"publisher","unstructured":"E.M. Schmidt, Y.E. Kim, Learning emotion-based acoustic features with deep belief networks, in 2011 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), New Paltz, NY (2011), pp. 65\u201368. https:\/\/doi.org\/10.1109\/ASPAA.2011.6082328","DOI":"10.1109\/ASPAA.2011.6082328"},{"issue":"2","key":"2562_CR54","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1109\/T-AFFC.2010.8","volume":"1","author":"B Schuller","year":"2010","unstructured":"B. Schuller et al., Cross-corpus acoustic emotion recognition: variances and strategies. IEEE Trans. Affect. Comput. 1(2), 119\u2013131 (2010). https:\/\/doi.org\/10.1109\/T-AFFC.2010.8","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"5","key":"2562_CR55","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1145\/3129340","volume":"61","author":"BW Schuller","year":"2018","unstructured":"B.W. Schuller, Speech emotion recognition: two decades in a nutshell, benchmarks, and ongoing trends. Commun. ACM 61(5), 90\u201399 (2018). https:\/\/doi.org\/10.1145\/3129340","journal-title":"Commun. ACM"},{"key":"2562_CR56","doi-asserted-by":"crossref","unstructured":"B. Schuller, R. M\u00fcller, M. Lang, G. Rigoll, Speaker independent emotion recognition by early fusion of acoustic and linguistic features within ensembles, in Proceedings of the 9th European Conference on Speech Communication and Technology (2005), pp. 805\u2013808","DOI":"10.21437\/Interspeech.2005-379"},{"key":"2562_CR57","doi-asserted-by":"publisher","unstructured":"A. Sonawane, M.U. Inamdar, K.B. Bhangale, Sound-based human emotion recognition using MFCC & multiple SVM, in 2017 International Conference on Information, Communication, Instrumentation and Control (ICICIC) (IEEE, 2017), pp. 1\u20134. https:\/\/doi.org\/10.1109\/ICOMICON.2017.8279046","DOI":"10.1109\/ICOMICON.2017.8279046"},{"key":"2562_CR58","doi-asserted-by":"publisher","unstructured":"K. Sridhar, S. Parthasarathy, C. Busso, Role of regularization in the prediction of valence from speech, in Proceedings of the Interspeech (Hyderabad, 2018), pp. 941\u2013945. https:\/\/doi.org\/10.21437\/Interspeech.2018-2508","DOI":"10.21437\/Interspeech.2018-2508"},{"key":"2562_CR59","doi-asserted-by":"publisher","unstructured":"A. Stuhlsatz, C. Meyer, F. Eyben, T. Zielke, G. Meier, B. Schuller, Deep neural networks for acoustic emotion recognition: raising the benchmarks, in 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2011), pp. 5688\u20135691. https:\/\/doi.org\/10.1109\/ICASSP.2011.5947651","DOI":"10.1109\/ICASSP.2011.5947651"},{"key":"2562_CR60","doi-asserted-by":"publisher","unstructured":"B.H. Su, C.C. Lee, A conditional cycle emotion Gan for cross corpus speech emotion recognition, in 2021 IEEE Spoken Language Technology Workshop (SLT) (IEEE, 2021), pp. 351\u2013357. https:\/\/doi.org\/10.1109\/SLT48900.2021.9383512","DOI":"10.1109\/SLT48900.2021.9383512"},{"issue":"1","key":"2562_CR61","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/s10772-018-9491-z","volume":"21","author":"M Swain","year":"2018","unstructured":"M. Swain, A. Routray, P. Kabisatpathy, Databases, features, and classifiers for speech emotion recognition: a review. Int. J. Speech Technol. 21(1), 93\u2013120 (2018). https:\/\/doi.org\/10.1007\/s10772-018-9491-z","journal-title":"Int. J. Speech Technol."},{"key":"2562_CR62","doi-asserted-by":"publisher","first-page":"108812","DOI":"10.1016\/j.sigpro.2022.108812","volume":"204","author":"P Wen","year":"2023","unstructured":"P. Wen, B. Wang, S. Zhang, B. Qu, X. Song, J. Sun, X. Mu, Bias-compensated augmented complex-valued NSAF algorithm and its low-complexity implementation. Signal Process. 204, 108812 (2023). https:\/\/doi.org\/10.1016\/j.sigpro.2022.108812","journal-title":"Signal Process."},{"key":"2562_CR63","doi-asserted-by":"publisher","unstructured":"L. Yi, M.W. Mak, Adversarial data augmentation network for speech emotion recognition, in 2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC) (IEEE, 2019), pp. 529\u2013534. https:\/\/doi.org\/10.1109\/APSIPAASC47483.2019.9023347","DOI":"10.1109\/APSIPAASC47483.2019.9023347"},{"key":"2562_CR64","doi-asserted-by":"publisher","unstructured":"K. Yuan, B. Ying, S. Vlaski, A.H. Sayed, Stochastic gradient descent with finite samples sizes, in IEEE 26th International Workshop on Machine Learning for Signal Processing (MLSP) (Vietrisul Mare, 2016), pp. 1\u20136. https:\/\/doi.org\/10.1109\/MLSP.2016.7738878","DOI":"10.1109\/MLSP.2016.7738878"},{"key":"2562_CR65","doi-asserted-by":"crossref","unstructured":"Z. Zhang, F. Ringeval, J. Han, J. Deng, E. Marchi, B. Schuller, Facing realism in spontaneous emotion recognition from speech: feature enhancement by autoencoder with LSTM neural networks, in Proceedings of the Interspeech (2016), pp. 3593\u20133597. https:\/\/hal.archives-ouvertes.fr\/hal-01494003","DOI":"10.21437\/Interspeech.2016-998"},{"key":"2562_CR66","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.neucom.2018.09.038","volume":"323","author":"Q Zhang","year":"2019","unstructured":"Q. Zhang, M. Zhang, T. Chen, Z. Sun, Y. Ma, B. Yu, Recent advances in convolutional neural network acceleration. Neurocomputing 323, 37\u201351 (2019). https:\/\/doi.org\/10.1016\/j.neucom.2018.09.038","journal-title":"Neurocomputing"},{"issue":"8","key":"2562_CR67","doi-asserted-by":"publisher","first-page":"1127","DOI":"10.1002\/spe.2487","volume":"47","author":"W Zhang","year":"2017","unstructured":"W. Zhang, D. Zhao, Z. Chai, L.T. Yang, X. Liu, F. Gong, S. Yang, Deep learning and SVM-based emotion recognition from Chinese speech for smart affective services. Softw. Pract. Exp. 47(8), 1127\u20131138 (2017). https:\/\/doi.org\/10.1002\/spe.2487","journal-title":"Softw. Pract. Exp."},{"key":"2562_CR68","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"J. Zhao, X. Mao, L. Chen, Speech emotion recognition using deep 1D & 2D CNN LSTM networks. Biomed. Signal Process. Control 47, 312\u20133243 (2019). https:\/\/doi.org\/10.1016\/j.bspc.2018.08.035","journal-title":"Biomed. Signal Process. Control"},{"key":"2562_CR69","doi-asserted-by":"publisher","unstructured":"W.Q. Zheng, J.S. Yu, Y.X. Zou, An experimental study of speech emotion recognition based on deep convolutional neural networks, in 2015 International Conference on Affective Computing and Intelligent Interaction (ACII) (IEEE, 2015), pp. 827\u2013831. https:\/\/doi.org\/10.1109\/ACII.2015.7344669","DOI":"10.1109\/ACII.2015.7344669"},{"key":"2562_CR70","doi-asserted-by":"publisher","unstructured":"Y. Zhou, Y. Sun, J. Zhang, Y. Yan, Speech emotion recognition using both spectral and prosodic features, in International Conference on Information Engineering and Computer Science (IEEE, 2009) pp. 1\u20134. https:\/\/doi.org\/10.1109\/ICIECS.2009.5362730","DOI":"10.1109\/ICIECS.2009.5362730"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02562-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-023-02562-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02562-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,27]],"date-time":"2024-02-27T12:13:38Z","timestamp":1709036018000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-023-02562-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,16]]},"references-count":70,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["2562"],"URL":"https:\/\/doi.org\/10.1007\/s00034-023-02562-5","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12,16]]},"assertion":[{"value":"22 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 November 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 November 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 December 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Both authors do not have any conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}