{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T00:03:40Z","timestamp":1773273820381,"version":"3.50.1"},"reference-count":70,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,8,25]],"date-time":"2022-08-25T00:00:00Z","timestamp":1661385600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,25]],"date-time":"2022-08-25T00:00:00Z","timestamp":1661385600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s00034-022-02130-3","type":"journal-article","created":{"date-parts":[[2022,8,25]],"date-time":"2022-08-25T13:03:00Z","timestamp":1661432580000},"page":"449-492","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":41,"title":["Deep Convolutional Neural Network and Gray Wolf Optimization Algorithm for Speech Emotion Recognition"],"prefix":"10.1007","volume":"42","author":[{"given":"Mohammad Reza","family":"Falahzadeh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6045-5424","authenticated-orcid":false,"given":"Fardad","family":"Farokhi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ali","family":"Harimi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Reza","family":"Sabbaghi-Nadooshan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,8,25]]},"reference":[{"issue":"4","key":"2130_CR1","doi-asserted-by":"publisher","first-page":"1249","DOI":"10.3390\/s21041249","volume":"21","author":"BJ Abbaschian","year":"2021","unstructured":"B.J. Abbaschian, D. Sierra-Sosa, A. Elmaghraby, Deep learning techniques for speech emotion recognition, from databases to models. Sensors 21(4), 1249 (2021). https:\/\/doi.org\/10.3390\/s21041249","journal-title":"Sensors"},{"key":"2130_CR2","doi-asserted-by":"publisher","DOI":"10.2991\/978-94-6239-061-4","volume-title":"Normal distribution in Normal and student st distributions and their applications","author":"M Ahsanullah","year":"2014","unstructured":"M. Ahsanullah, B.G. Kibria, M. Shakil, Normal distribution in Normal and student st distributions and their applications (Springer, 2014)"},{"issue":"1","key":"2130_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.13140\/RG.2.2.28948.04489","volume":"1","author":"PJM Ali","year":"2014","unstructured":"P.J.M. Ali, R.H. Faraj, E. Koya, P.J.M. Ali, R.H. Faraj, Data normalization and standardization: a technical report. Mach. Learn. Tech. Rep. 1(1), 1\u20136 (2014). https:\/\/doi.org\/10.13140\/RG.2.2.28948.04489","journal-title":"Mach. Learn. Tech. Rep."},{"issue":"4","key":"2130_CR4","doi-asserted-by":"publisher","first-page":"8197","DOI":"10.1016\/j.eswa.2008.10.005","volume":"36","author":"H Altun","year":"2009","unstructured":"H. Altun, G. Polat, Boosting selection of speech related features to improve performance of multi-class SVMs in emotion detection. Expert Syst. Appl. 36(4), 8197\u20138203 (2009). https:\/\/doi.org\/10.1016\/j.eswa.2008.10.005","journal-title":"Expert Syst. Appl."},{"issue":"2","key":"2130_CR5","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s10462-012-9368-5","volume":"43","author":"C-N Anagnostopoulos","year":"2015","unstructured":"C.-N. Anagnostopoulos, T. Iliou, I. Giannoukos, Features and classifiers for emotion recognition from speech: a survey from 2000 to 2011. Artif. Intell. Rev. 43(2), 155\u2013177 (2015). https:\/\/doi.org\/10.1007\/s10462-012-9368-5","journal-title":"Artif. Intell. Rev."},{"issue":"3","key":"2130_CR6","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M El Ayadi","year":"2011","unstructured":"M. El Ayadi, M.S. Kamel, F. Karray, Survey on speech emotion recognition: Features, classification schemes, and databases. Pattern Recogn. 44(3), 572\u2013587 (2011). https:\/\/doi.org\/10.1016\/j.patcog.2010.09.020","journal-title":"Pattern Recogn."},{"issue":"47","key":"2130_CR7","doi-asserted-by":"publisher","first-page":"35739","DOI":"10.1007\/s11042-020-09591-1","volume":"79","author":"A Bakhshi","year":"2020","unstructured":"A. Bakhshi, S. Chalup, A. Harimi, S.M. Mirhassani, Recognition of emotion from speech using evolutionary cepstral coefficients. Multim. Tools Appl. 79(47), 35739\u201335759 (2020). https:\/\/doi.org\/10.1007\/s11042-020-09591-1","journal-title":"Multim. Tools Appl."},{"key":"2130_CR8","doi-asserted-by":"publisher","first-page":"104886","DOI":"10.1016\/j.knosys.2019.104886","volume":"184","author":"A Bhavan","year":"2019","unstructured":"A. Bhavan, P. Chauhan, R.R. Shah, Bagged support vector machines for emotion recognition from speech. Knowl.-Based Syst. 184, 104886 (2019). https:\/\/doi.org\/10.1016\/j.knosys.2019.104886","journal-title":"Knowl.-Based Syst."},{"issue":"9\u201310","key":"2130_CR9","doi-asserted-by":"publisher","first-page":"1186","DOI":"10.1016\/j.specom.2011.04.003","volume":"53","author":"E Bozkurt","year":"2011","unstructured":"E. Bozkurt, E. Erzin, C.E. Erdem, A.T. Erdem, Formant position based weighted spectral features for emotion recognition. Speech Commun. 53(9\u201310), 1186\u20131197 (2011). https:\/\/doi.org\/10.1016\/j.specom.2011.04.003","journal-title":"Speech Commun."},{"key":"2130_CR10","doi-asserted-by":"publisher","unstructured":"F. Burkhardt, A. Paeschke, M. Rolfes, W. F. Sendlmeier, and B. Weiss, \"A database of German emotional speech,\" in Ninth European conference on speech communication and technology, 2005. doi:https:\/\/doi.org\/10.21437\/Interspeech.2005-446","DOI":"10.21437\/Interspeech.2005-446"},{"issue":"20","key":"2130_CR11","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1007\/978-3-642-21402-8_35","volume":"1","author":"Y Chavhan","year":"2010","unstructured":"Y. Chavhan, M. Dhore, P. Yesaware, Speech emotion recognition using support vector machine. Int. J. Computer Appl. 1(20), 6\u20139 (2010). https:\/\/doi.org\/10.1007\/978-3-642-21402-8_35","journal-title":"Int. J. Computer Appl."},{"key":"2130_CR12","unstructured":"F. Chollet, Deep learning with Python. Manning New York, 2018."},{"key":"2130_CR13","doi-asserted-by":"publisher","unstructured":"F. Dellaert, T. Polzin, and A. Waibel, \"Recognizing emotion in speech,\" in Proceeding of Fourth International Conference on Spoken Language Processing. ICSLP'96, 1996, vol. 3: IEEE, pp. 1970\u20131973. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICSLP.1996.608022","DOI":"10.1109\/ICSLP.1996.608022"},{"issue":"8","key":"2130_CR14","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/s00521-016-2712-y","volume":"29","author":"S Demircan","year":"2018","unstructured":"S. Demircan, H. Kahramanli, Application of fuzzy C-means clustering algorithm to spectral features for emotion classification from speech. Neural Comput. Appl. 29(8), 59\u201366 (2018). https:\/\/doi.org\/10.1007\/s00521-016-2712-y","journal-title":"Neural Comput. Appl."},{"issue":"1","key":"2130_CR15","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1109\/TASLP.2017.2759338","volume":"26","author":"J Deng","year":"2017","unstructured":"J. Deng, X. Xu, Z. Zhang, S. Fr\u00fchholz, B. Schuller, Semisupervised autoencoders for speech emotion recognition. IEEE\/ACM Trans. Audio, Speech, Language Process. 26(1), 31\u201343 (2017). https:\/\/doi.org\/10.1109\/TASLP.2017.2759338","journal-title":"IEEE\/ACM Trans. Audio, Speech, Language Process."},{"key":"2130_CR16","volume-title":"Real-time speech and music classification by large audio feature space extraction","author":"F Eyben","year":"2015","unstructured":"F. Eyben, Real-time speech and music classification by large audio feature space extraction (Springer, 2015)"},{"issue":"2","key":"2130_CR17","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","volume":"7","author":"F Eyben","year":"2015","unstructured":"F. Eyben et al., The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE Trans. Affect. Comput. 7(2), 190\u2013202 (2015). https:\/\/doi.org\/10.1109\/TAFFC.2015.2457417","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"2","key":"2130_CR18","first-page":"259","volume":"9","author":"M Fallahzadeh","year":"2021","unstructured":"M. Fallahzadeh, F. Farokhi, A. Harimi, R. Sabbaghi-Nadooshan, Facial expression recognition based on image gradient and deep convolutional neural network. J. AI Data Mining 9(2), 259\u2013268 (2021)","journal-title":"J. AI Data Mining"},{"issue":"2","key":"2130_CR19","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1007\/s00521-017-3272-5","volume":"30","author":"H Faris","year":"2018","unstructured":"H. Faris, I. Aljarah, M.A. Al-Betar, S. Mirjalili, Grey wolf optimizer: a review of recent variants and applications. Neural Comput. Appl. 30(2), 413\u2013435 (2018). https:\/\/doi.org\/10.1007\/s00521-017-3272-5","journal-title":"Neural Comput. Appl."},{"key":"2130_CR20","doi-asserted-by":"publisher","unstructured":"M. Giollo, D. Gunceler, Y. Liu, and D. Willett, \"Bootstrap an end-to-end ASR system by multilingual training, transfer learning, text-to-text mapping and synthetic audio,\" arXiv preprint arXiv:2011.12696, 2020. doi: https:\/\/doi.org\/10.48550\/arXiv.2011.12696","DOI":"10.48550\/arXiv.2011.12696"},{"issue":"5","key":"2130_CR21","doi-asserted-by":"publisher","first-page":"479","DOI":"10.3390\/e21050479","volume":"21","author":"N Hajarolasvadi","year":"2019","unstructured":"N. Hajarolasvadi, H. Demirel, 3D CNN-based speech emotion recognition using k-means clustering and spectrograms. Entropy 21(5), 479 (2019). https:\/\/doi.org\/10.3390\/e21050479","journal-title":"Entropy"},{"key":"2130_CR22","doi-asserted-by":"crossref","unstructured":"K. Han, D. Yu, and I. Tashev, \"Speech emotion recognition using deep neural network and extreme learning machine,\" in Fifteenth annual Conference of the international speech communication association, 2014.","DOI":"10.21437\/Interspeech.2014-57"},{"issue":"7","key":"2130_CR23","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1080\/08839514.2015.1051891","volume":"29","author":"A Harimi","year":"2015","unstructured":"A. Harimi, A. AhmadyFard, A. Shahzadi, K. Yaghmaie, Anger or joy? Emotion recognition using nonlinear dynamics of speech. Appl. Artif. Intell. 29(7), 675\u2013696 (2015). https:\/\/doi.org\/10.1080\/08839514.2015.1051891","journal-title":"Appl. Artif. Intell."},{"issue":"4","key":"2130_CR24","doi-asserted-by":"publisher","first-page":"262","DOI":"10.22452\/mjcs.vol29no4.2","volume":"29","author":"A Harimi","year":"2016","unstructured":"A. Harimi, H.S. Fakhr, A. Bakhshi, Recognition of emotion using reconstructed phase space of speech. Malays. J. Comput. Sci. 29(4), 262\u2013271 (2016). https:\/\/doi.org\/10.22452\/mjcs.vol29no4.2","journal-title":"Malays. J. Comput. Sci."},{"key":"2130_CR25","doi-asserted-by":"publisher","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun, \"Deep residual learning for image recognition,\" in Proceedings of the IEEE Conference on computer vision and pattern recognition, 2016, pp. 770\u2013778. doi: https:\/\/doi.org\/10.48550\/arXiv.1512.03385","DOI":"10.48550\/arXiv.1512.03385"},{"key":"2130_CR26","doi-asserted-by":"publisher","unstructured":"Z. Huang, M. Dong, Q. Mao, and Y. Zhan, \"Speech emotion recognition using CNN,\" in Proceedings of the 22nd ACM International Conference on Multimedia, 2014: ACM, pp. 801\u2013804. DOI:https:\/\/doi.org\/10.37200\/IJPR\/V24I8\/PR280260","DOI":"10.37200\/IJPR\/V24I8\/PR280260"},{"key":"2130_CR27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-05318-5","volume-title":"Automated machine learning: methods, systems, challenges","author":"F Hutter","year":"2019","unstructured":"F. Hutter, L. Kotthoff, J. Vanschoren, Automated machine learning: methods, systems, challenges (Springer Nature, 2019)"},{"issue":"7","key":"2130_CR28","doi-asserted-by":"publisher","first-page":"760","DOI":"10.1016\/j.specom.2004.12.002","volume":"48","author":"KM Indrebo","year":"2006","unstructured":"K.M. Indrebo, R.J. Povinelli, M.T. Johnson, Sub-banded reconstructed phase spaces for speech recognition. Speech Commun. 48(7), 760\u2013774 (2006). https:\/\/doi.org\/10.1016\/j.specom.2004.12.002","journal-title":"Speech Commun."},{"key":"2130_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"D. Issa, M.F. Demirci, A. Yazici, Speech emotion recognition with deep convolutional neural networks. Biomed. Signal Process. Control 59, 101894 (2020). https:\/\/doi.org\/10.1016\/j.bspc.2020.101894","journal-title":"Biomed. Signal Process. Control"},{"key":"2130_CR30","doi-asserted-by":"publisher","unstructured":"M. T. Johnson, A. C. Lindgren, R. J. Povinelli, and X. Yuan, \"Performance of nonlinear speech enhancement using phase space reconstruction,\" in 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings.(ICASSP'03). 2003, vol. 1: IEEE, pp. I-I. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICASSP.2003.1198932","DOI":"10.1109\/ICASSP.2003.1198932"},{"key":"2130_CR31","doi-asserted-by":"publisher","unstructured":"V. Joshi, R. Zhao, R. R. Mehta, K. Kumar, and J. Li, \"Transfer learning approaches for streaming end-to-end speech recognition system,\" arXiv preprint arXiv:2008.05086, 2020. doi: https:\/\/doi.org\/10.48550\/arXiv.2008.05086","DOI":"10.48550\/arXiv.2008.05086"},{"issue":"6","key":"2130_CR32","doi-asserted-by":"publisher","first-page":"3403","DOI":"10.1103\/PhysRevA.45.3403","volume":"45","author":"MB Kennel","year":"1992","unstructured":"M.B. Kennel, R. Brown, H.D. Abarbanel, Determining embedding dimension for phase-space reconstruction using a geometrical construction. Phys. Rev. A 45(6), 3403 (1992). https:\/\/doi.org\/10.1103\/PhysRevA.45.3403","journal-title":"Phys. Rev. A"},{"key":"2130_CR33","doi-asserted-by":"publisher","first-page":"117327","DOI":"10.1109\/ACCESS.2019.2936124","volume":"7","author":"RA Khalil","year":"2019","unstructured":"R.A. Khalil, E. Jones, M.I. Babar, T. Jan, M.H. Zafar, T. Alhussain, Speech emotion recognition using deep learning techniques: a review. IEEE Access 7, 117327\u2013117345 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2936124","journal-title":"IEEE Access"},{"issue":"3","key":"2130_CR34","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1109\/TMECH.2008.2008644","volume":"14","author":"EH Kim","year":"2009","unstructured":"E.H. Kim, K.H. Hyun, S.H. Kim, Y.K. Kwak, Improved emotion recognition with a novel speaker-independent feature. IEEE\/ASME Trans. Mechatron. 14(3), 317\u2013325 (2009). https:\/\/doi.org\/10.1109\/TMECH.2008.2008644","journal-title":"IEEE\/ASME Trans. Mechatron."},{"key":"2130_CR35","doi-asserted-by":"publisher","unstructured":"Y. Kim, H. Lee, and E. M. Provost, \"Deep learning for robust feature generation in audiovisual emotion recognition,\" in 2013 IEEE International Conference on acoustics, speech and signal processing, 2013: IEEE, pp. 3687\u20133691. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICASSP.2013.6638346","DOI":"10.1109\/ICASSP.2013.6638346"},{"key":"2130_CR36","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/j.neucom.2011.12.021","volume":"84","author":"J Krajewski","year":"2012","unstructured":"J. Krajewski, S. Schnieder, D. Sommer, A. Batliner, B. Schuller, Applying multiple classifiers and non-linear dynamics features for detecting sleepiness from speech. Neurocomputing 84, 65\u201375 (2012). https:\/\/doi.org\/10.1016\/j.neucom.2011.12.021","journal-title":"Neurocomputing"},{"key":"2130_CR37","doi-asserted-by":"publisher","first-page":"1097","DOI":"10.1145\/3065386","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"A. Krizhevsky, I. Sutskever, G.E. Hinton, Imagenet classification with deep convolutional neural networks. Adv. Neural. Inf. Process. Syst. 25, 1097\u20131105 (2012). https:\/\/doi.org\/10.1145\/3065386","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"10","key":"2130_CR38","doi-asserted-by":"publisher","first-page":"1163","DOI":"10.3390\/electronics10101163","volume":"10","author":"E Lieskovsk\u00e1","year":"2021","unstructured":"E. Lieskovsk\u00e1, M. Jakubec, R. Jarina, M. Chmul\u00edk, A Review on Speech Emotion Recognition Using Deep Learning and Attention Mechanism. Electronics 10(10), 1163 (2021). https:\/\/doi.org\/10.3390\/electronics10101163","journal-title":"Electronics"},{"issue":"6","key":"2130_CR39","doi-asserted-by":"publisher","first-page":"490","DOI":"10.1109\/TMM.2010.2051872","volume":"12","author":"I Luengo","year":"2010","unstructured":"I. Luengo, E. Navas, I. Hern\u00e1ez, Feature analysis and evaluation for automatic emotion identification in speech. IEEE Trans. Multim. 12(6), 490\u2013501 (2010). https:\/\/doi.org\/10.1109\/TMM.2010.2051872","journal-title":"IEEE Trans. Multim."},{"issue":"1","key":"2130_CR40","doi-asserted-by":"publisher","first-page":"111","DOI":"10.3390\/e23020221","volume":"1","author":"H-G Ma","year":"2006","unstructured":"H.-G. Ma, C.-Z. Han, Selection of embedding dimension and delay time in phase space reconstruction. Front. Electr. Electron. Eng. China 1(1), 111\u2013114 (2006). https:\/\/doi.org\/10.3390\/e23020221","journal-title":"Front. Electr. Electron. Eng. China"},{"key":"2130_CR41","doi-asserted-by":"publisher","unstructured":"O. Martin, I. Kotsia, B. Macq, and I. Pitas, \"The eNTERFACE'05 audio-visual emotion database,\" in 22nd International Conference on Data Engineering Workshops (ICDEW'06), 2006: IEEE, pp. 8\u20138. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICDEW.2006.145","DOI":"10.1109\/ICDEW.2006.145"},{"key":"2130_CR42","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1016\/j.advengsoft.2013.12.007","volume":"69","author":"S Mirjalili","year":"2014","unstructured":"S. Mirjalili, S.M. Mirjalili, A. Lewis, Grey wolf optimizer. Adv. Eng. Softw. 69, 46\u201361 (2014). https:\/\/doi.org\/10.1016\/j.advengsoft.2013.12.007","journal-title":"Adv. Eng. Softw."},{"issue":"2","key":"2130_CR43","doi-asserted-by":"publisher","first-page":"1265","DOI":"10.1007\/s00366-019-00882-2","volume":"37","author":"H Moayedi","year":"2021","unstructured":"H. Moayedi, H. Nguyen, L. Kok Foong, Nonlinear evolutionary swarm intelligence of grasshopper optimization algorithm and gray wolf optimization for weight adjustment of neural network. Eng. Computers 37(2), 1265\u20131275 (2021). https:\/\/doi.org\/10.1007\/s00366-019-00882-2","journal-title":"Eng. Computers"},{"issue":"4","key":"2130_CR44","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1007\/s005210070006","volume":"9","author":"J Nicholson","year":"2000","unstructured":"J. Nicholson, K. Takahashi, R. Nakatsu, Emotion recognition in speech using neural networks. Neural Comput. Appl. 9(4), 290\u2013296 (2000). https:\/\/doi.org\/10.1007\/s005210070006","journal-title":"Neural Comput. Appl."},{"key":"2130_CR45","doi-asserted-by":"publisher","unstructured":"Y. Niu, D. Zou, Y. Niu, Z. He, and H. Tan, \"A breakthrough in speech emotion recognition using deep retinal convolution neural networks,\" arXiv preprint arXiv:1707.09917, 2017. doi: https:\/\/doi.org\/10.48550\/arXiv.1707.09917","DOI":"10.48550\/arXiv.1707.09917"},{"key":"2130_CR46","doi-asserted-by":"publisher","unstructured":"T.-L. Pao, C. S. Chien, Y.-T. Chen, J.-H. Yeh, Y.-M. Cheng, and W.-Y. Liao, \"Combination of multiple classifiers for improving emotion recognition in Mandarin speech,\" in Third International Conference on Intelligent Information Hiding and Multimedia Signal Processing (IIH-MSP 2007), 2007, vol. 1: IEEE, pp. 35\u201338. DOI:\u00a0https:\/\/doi.org\/10.1109\/IIHMSP.2007.4457487","DOI":"10.1109\/IIHMSP.2007.4457487"},{"key":"2130_CR47","volume-title":"Signals, systems, and transforms","author":"CL Phillips","year":"2003","unstructured":"C.L. Phillips, J.M. Parr, E.A. Riskin, T. Prabhakar, Signals, systems, and transforms (Prentice Hall, 2003)"},{"key":"2130_CR48","unstructured":"P. Prajith, \"Investigations on the applications of dynamical instabilities and deterministic chaos for speech signal processing,\" 2008."},{"key":"2130_CR49","doi-asserted-by":"publisher","unstructured":"B. Schuller, G. Rigoll, and M. Lang, \"Hidden Markov model-based speech emotion recognition,\" in 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings.(ICASSP'03). 2003, vol. 2: IEEE, pp. II-1. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICME.2003.1220939","DOI":"10.1109\/ICME.2003.1220939"},{"key":"2130_CR50","doi-asserted-by":"publisher","unstructured":"B. Schuller, G. Rigoll, and M. Lang, \"Speech emotion recognition combining acoustic features and linguistic information in a hybrid support vector machine-belief network architecture,\" in 2004 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2004, vol. 1: IEEE, pp. I-577. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICASSP.2004.1326051","DOI":"10.1109\/ICASSP.2004.1326051"},{"key":"2130_CR51","doi-asserted-by":"publisher","unstructured":"B. Schuller, B. Vlasenko, F. Eyben, G. Rigoll, and A. Wendemuth, \"Acoustic emotion recognition: A benchmark comparison of performances,\" in 2009 IEEE Workshop on Automatic Speech Recognition and Understanding, 2009: IEEE, pp. 552\u2013557. DOI:https:\/\/doi.org\/10.1109\/ASRU.2009.5372886","DOI":"10.1109\/ASRU.2009.5372886"},{"key":"2130_CR52","doi-asserted-by":"publisher","first-page":"871","DOI":"10.3906\/elk-1302-90","volume":"23","author":"A Shahzadi","year":"2015","unstructured":"A. Shahzadi, A. Ahmadyfard, A. Harimi, K. Yaghmaie, Speech emotion recognition using nonlinear dynamics features. Turkish J. Electr. Eng. Computer Sci. 23, 871 (2015). https:\/\/doi.org\/10.3906\/elk-1302-90","journal-title":"Turkish J. Electr. Eng. Computer Sci."},{"issue":"1","key":"2130_CR53","doi-asserted-by":"publisher","first-page":"100","DOI":"10.4218\/etrij.13.0112.0074","volume":"35","author":"Y Shekofteh","year":"2013","unstructured":"Y. Shekofteh, F. Almasganj, Feature extraction based on speech attractors in the reconstructed phase space for automatic speech recognition systems. ETRI J. 35(1), 100\u2013108 (2013). https:\/\/doi.org\/10.4218\/etrij.13.0112.0074","journal-title":"ETRI J."},{"key":"2130_CR54","doi-asserted-by":"publisher","unstructured":"K. Simonyan and A. Zisserman, \"Very deep convolutional networks for large-scale image recognition,\" arXiv preprint arXiv:1409.1556, 2014. doi: https:\/\/doi.org\/10.48550\/arXiv.1409.1556","DOI":"10.48550\/arXiv.1409.1556"},{"key":"2130_CR55","doi-asserted-by":"publisher","unstructured":"A. Stuhlsatz, C. Meyer, F. Eyben, T. Zielke, G. Meier, and B. Schuller, \"Deep neural networks for acoustic emotion recognition: raising the benchmarks,\" in 2011 IEEE International Conference on acoustics, speech and signal processing (ICASSP), 2011: IEEE, pp. 5688\u20135691. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICASSP.2011.5947651","DOI":"10.1109\/ICASSP.2011.5947651"},{"key":"2130_CR56","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1155\/2020\/9452976","volume":"871","author":"Y Sun","year":"2020","unstructured":"Y. Sun, X.-Y. Zhang, J.-H. Ma, C.-X. Song, H.-F. Lv, Nonlinear dynamic feature extraction based on phase space reconstruction for the classification of speech and emotion. Mathem. Probl. Eng. 871, 45 (2020). https:\/\/doi.org\/10.1155\/2020\/9452976","journal-title":"Mathem. Probl. Eng."},{"key":"2130_CR57","doi-asserted-by":"publisher","unstructured":"C. Szegedy et al., \"Going deeper with convolutions,\" in Proceedings of the IEEE Conference on computer vision and pattern recognition, 2015, pp. 1\u20139. doi: https:\/\/doi.org\/10.48550\/arXiv.1409.4842","DOI":"10.48550\/arXiv.1409.4842"},{"key":"2130_CR58","doi-asserted-by":"publisher","unstructured":"G. Trigeorgis et al., \"Adieu features? end-to-end speech emotion recognition using a deep convolutional recurrent network,\" in 2016 IEEE International Conference on acoustics, speech and signal processing (ICASSP), 2016: IEEE, pp. 5200\u20135204. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICASSP.2016.7472669","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"2130_CR59","doi-asserted-by":"publisher","first-page":"106547","DOI":"10.1016\/j.knosys.2020.106547","volume":"211","author":"T Tuncer","year":"2021","unstructured":"T. Tuncer, S. Dogan, U.R. Acharya, Automated accurate speech emotion recognition system using twine shuffle pattern and iterative neighborhood component analysis techniques. Knowl.-Based Syst. 211, 106547 (2021). https:\/\/doi.org\/10.1016\/j.knosys.2020.106547","journal-title":"Knowl.-Based Syst."},{"key":"2130_CR60","doi-asserted-by":"publisher","unstructured":"D. Ververidis and C. Kotropoulos, \"Emotional speech classification using Gaussian mixture models and the sequential floating forward selection algorithm,\" in 2005 IEEE International Conference on Multimedia and Expo, 2005: IEEE, pp. 1500\u20131503. DOI:\u00a0https:\/\/doi.org\/10.1109\/ICME.2005.1521717","DOI":"10.1109\/ICME.2005.1521717"},{"key":"2130_CR61","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1016\/j.swevo.2019.06.002","volume":"49","author":"Y Wang","year":"2019","unstructured":"Y. Wang, H. Zhang, G. Zhang, cPSO-CNN: An efficient PSO-based algorithm for fine-tuning hyper-parameters of convolutional neural networks. Swarm Evol. Comput. 49, 114\u2013123 (2019). https:\/\/doi.org\/10.1016\/j.swevo.2019.06.002","journal-title":"Swarm Evol. Comput."},{"issue":"5","key":"2130_CR62","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1016\/j.specom.2010.08.013","volume":"53","author":"S Wu","year":"2011","unstructured":"S. Wu, T.H. Falk, W.-Y. Chan, Automatic speech emotion recognition using modulation spectral features. Speech Commun. 53(5), 768\u2013785 (2011). https:\/\/doi.org\/10.1016\/j.specom.2010.08.013","journal-title":"Speech Commun."},{"issue":"11","key":"2130_CR63","doi-asserted-by":"publisher","first-page":"1675","DOI":"10.1109\/TASLP.2019.2925934","volume":"27","author":"Y Xie","year":"2019","unstructured":"Y. Xie, R. Liang, Z. Liang, C. Huang, C. Zou, B. Schuller, Speech emotion classification using attention-based LSTM. IEEE\/ACM Trans. Audio, Speech, Language Process. 27(11), 1675\u20131685 (2019). https:\/\/doi.org\/10.1109\/TASLP.2019.2925934","journal-title":"IEEE\/ACM Trans. Audio, Speech, Language Process."},{"issue":"7","key":"2130_CR64","doi-asserted-by":"publisher","first-page":"1436","DOI":"10.1109\/TASLP.2017.2694704","volume":"25","author":"X Xu","year":"2017","unstructured":"X. Xu et al., A two-dimensional framework of multiple kernel subspace learning for recognizing emotion in speech. IEEE\/ACM Trans. Audio, Speech, Language Process. 25(7), 1436\u20131449 (2017). https:\/\/doi.org\/10.1109\/TASLP.2017.2694704","journal-title":"IEEE\/ACM Trans. Audio, Speech, Language Process."},{"issue":"5","key":"2130_CR65","doi-asserted-by":"publisher","first-page":"1415","DOI":"10.1016\/j.sigpro.2009.09.009","volume":"90","author":"B Yang","year":"2010","unstructured":"B. Yang, M. Lugger, Emotion recognition from speech signals using new harmony features. Signal Process. 90(5), 1415\u20131423 (2010). https:\/\/doi.org\/10.1016\/j.sigpro.2009.09.009","journal-title":"Signal Process."},{"issue":"3","key":"2130_CR66","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/TAFFC.2016.2553038","volume":"8","author":"S Zhalehpour","year":"2016","unstructured":"S. Zhalehpour, O. Onder, Z. Akhtar, C.E. Erdem, BAUM-1: A spontaneous audio-visual face database of affective and mental states. IEEE Trans. Affect. Comput. 8(3), 300\u2013313 (2016). https:\/\/doi.org\/10.1109\/TAFFC.2016.2553038","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"6","key":"2130_CR67","doi-asserted-by":"publisher","first-page":"1576","DOI":"10.1109\/TMM.2017.2766843","volume":"20","author":"S Zhang","year":"2017","unstructured":"S. Zhang, S. Zhang, T. Huang, W. Gao, Speech emotion recognition using deep convolutional neural network and discriminant temporal pyramid matching. IEEE Trans. Multim. 20(6), 1576\u20131590 (2017). https:\/\/doi.org\/10.1109\/TMM.2017.2766843","journal-title":"IEEE Trans. Multim."},{"issue":"1","key":"2130_CR68","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1109\/TASLP.2014.2375558","volume":"23","author":"Z Zhang","year":"2015","unstructured":"Z. Zhang, E. Coutinho, J. Deng, B. Schuller, \u201cCooperative learning and its application to emotion recognition from speech,.\u201d IEEE\/ACM Trans. Audio Speech Language Process. (TASLP) 23(1), 115\u2013126 (2015). https:\/\/doi.org\/10.1109\/TASLP.2014.2375558","journal-title":"IEEE\/ACM Trans. Audio Speech Language Process. (TASLP)"},{"issue":"6","key":"2130_CR69","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1049\/iet-spr.2017.0320","volume":"12","author":"J Zhao","year":"2018","unstructured":"J. Zhao, X. Mao, L. Chen, Learning deep features to recognise speech emotion using merged deep CNN. IET Signal Proc. 12(6), 713\u2013721 (2018). https:\/\/doi.org\/10.1049\/iet-spr.2017.0320","journal-title":"IET Signal Proc."},{"key":"2130_CR70","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"J. Zhao, X. Mao, L. Chen, Speech emotion recognition using deep 1D and 2D CNN LSTM networks. Biomed. Signal Process. Control 47, 312\u2013323 (2019). https:\/\/doi.org\/10.1016\/j.bspc.2018.08.035","journal-title":"Biomed. Signal Process. Control"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-02130-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-022-02130-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-02130-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,4]],"date-time":"2023-01-04T05:20:50Z","timestamp":1672809650000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-022-02130-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,25]]},"references-count":70,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["2130"],"URL":"https:\/\/doi.org\/10.1007\/s00034-022-02130-3","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,25]]},"assertion":[{"value":"13 July 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 July 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 July 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 August 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}