{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T19:51:01Z","timestamp":1770493861966,"version":"3.49.0"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s00034-023-02315-4","type":"journal-article","created":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T09:03:03Z","timestamp":1677661383000},"page":"4271-4291","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A 3D Tensor Representation of Speech and 3D Convolutional Neural Network for Emotion Recognition"],"prefix":"10.1007","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1548-9190","authenticated-orcid":false,"given":"Mohammad Reza","family":"Falahzadeh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fardad","family":"Farokhi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ali","family":"Harimi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Reza","family":"Sabbaghi-Nadooshan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,3,1]]},"reference":[{"key":"2315_CR1","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.specom.2022.02.007","volume":"139","author":"A Bakhshi","year":"2022","unstructured":"A. Bakhshi, A. Harimi, S. Chalup, CyTex: transforming speech to textured images for speech emotion recognition. Speech Commun. 139, 62\u201375 (2022). https:\/\/doi.org\/10.1016\/j.specom.2022.02.007","journal-title":"Speech Commun."},{"key":"2315_CR2","doi-asserted-by":"publisher","first-page":"104886","DOI":"10.1016\/j.knosys.2019.104886","volume":"184","author":"A Bhavan","year":"2019","unstructured":"A. Bhavan, P. Chauhan, R.R. Shah, Bagged support vector machines for emotion recognition from speech. Knowl. Based Syst. 184, 104886 (2019). https:\/\/doi.org\/10.1016\/j.knosys.2019.104886","journal-title":"Knowl. Based Syst."},{"issue":"7\u20138","key":"2315_CR3","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1016\/j.specom.2010.02.010","volume":"52","author":"D Bitouk","year":"2010","unstructured":"D. Bitouk, R. Verma, A. Nenkova, Class-level spectral features for emotion recognition. Speech Commun. 52(7\u20138), 613\u2013625 (2010). https:\/\/doi.org\/10.1016\/j.specom.2010.02.010","journal-title":"Speech Commun."},{"issue":"9\u201310","key":"2315_CR4","doi-asserted-by":"publisher","first-page":"1186","DOI":"10.1016\/j.specom.2011.04.003","volume":"53","author":"E Bozkurt","year":"2011","unstructured":"E. Bozkurt, E. Erzin, C.E. Erdem, A.T. Erdem, Formant position based weighted spectral features for emotion recognition. Speech Commun. 53(9\u201310), 1186\u20131197 (2011). https:\/\/doi.org\/10.1016\/j.specom.2011.04.003","journal-title":"Speech Commun."},{"key":"2315_CR5","doi-asserted-by":"publisher","unstructured":"F. Burkhardt, A. Paeschke, M. Rolfes, W. F. Sendlmeier, B. Weiss, A database of German emotional speech, in Ninth European Conference on Speech Communication and Technology (2005). https:\/\/doi.org\/10.21437\/Interspeech.2005-446","DOI":"10.21437\/Interspeech.2005-446"},{"key":"2315_CR6","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1109\/ACCESS.2016.2641480","volume":"5","author":"M Chen","year":"2016","unstructured":"M. Chen, P. Zhou, G. Fortino, Emotion communication system. IEEE Access 5, 326\u2013337 (2016). https:\/\/doi.org\/10.1109\/ACCESS.2016.2641480","journal-title":"IEEE Access"},{"key":"2315_CR7","volume-title":"Deep Learning with Python","author":"F Chollet","year":"2018","unstructured":"F. Chollet, Deep Learning with Python (Manning, New York, 2018)"},{"issue":"3\u20134","key":"2315_CR8","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1561\/2000000039","volume":"7","author":"L Deng","year":"2014","unstructured":"L. Deng, D. Yu, Deep learning: methods and applications. Found. Trends Signal Process. 7(3\u20134), 197\u2013387 (2014). https:\/\/doi.org\/10.1561\/2000000039","journal-title":"Found. Trends Signal Process."},{"issue":"3","key":"2315_CR9","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M El Ayadi","year":"2011","unstructured":"M. El Ayadi, M.S. Kamel, F. Karray, Survey on speech emotion recognition: features, classification schemes, and databases. Pattern Recogn. 44(3), 572\u2013587 (2011). https:\/\/doi.org\/10.1016\/j.patcog.2010.09.020","journal-title":"Pattern Recogn."},{"key":"2315_CR10","volume-title":"Real-Time Speech and Music Classification by Large Audio Feature Space Extraction","author":"F Eyben","year":"2015","unstructured":"F. Eyben, Real-Time Speech and Music Classification by Large Audio Feature Space Extraction (Springer, Berlin, 2015)"},{"issue":"2","key":"2315_CR11","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","volume":"7","author":"F Eyben","year":"2015","unstructured":"F. Eyben et al., The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE Trans. Affect. Comput. 7(2), 190\u2013202 (2015). https:\/\/doi.org\/10.1109\/TAFFC.2015.2457417","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"1","key":"2315_CR12","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/s00034-022-02130-3","volume":"42","author":"MR Falahzadeh","year":"2023","unstructured":"M.R. Falahzadeh, F. Farokhi, A. Harimi, R. Sabbaghi-Nadooshan, Deep Convolutional neural network and gray wolf optimization algorithm for speech emotion recognition. Circuits Syst. Signal Process. 42(1), 449\u2013492 (2023). https:\/\/doi.org\/10.1007\/s00034-022-02130-3","journal-title":"Circuits Syst. Signal Process."},{"key":"2315_CR13","doi-asserted-by":"publisher","first-page":"112460","DOI":"10.1109\/ACCESS.2022.3217226","volume":"10","author":"MR Falahzadeh","year":"2022","unstructured":"M.R. Falahzadeh, E.Z. Farsa, A. Harimi, A. Ahmadi, A. Abraham, 3D Convolutional Neural network for speech emotion recognition with its realization on intel CPU and NVIDIA GPU. IEEE Access 10, 112460\u2013112471 (2022). https:\/\/doi.org\/10.1109\/ACCESS.2022.3217226","journal-title":"IEEE Access"},{"issue":"2","key":"2315_CR14","doi-asserted-by":"publisher","first-page":"259","DOI":"10.22044\/JADM.2021.9898.2121","volume":"9","author":"M Fallahzadeh","year":"2021","unstructured":"M. Fallahzadeh, F. Farokhi, A. Harimi, R. Sabbaghi-Nadooshan, Facial expression recognition based on image gradient and deep convolutional neural network. J. AI Data Min. 9(2), 259\u2013268 (2021). https:\/\/doi.org\/10.22044\/JADM.2021.9898.2121","journal-title":"J. AI Data Min."},{"key":"2315_CR15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-71","volume-title":"Detecting autism, emotions and social signals using AdaBoost","author":"G Gosztolya","year":"2013","unstructured":"G. Gosztolya, R. Busa-Fekete, L. T\u00f3th, Detecting autism, emotions and social signals using AdaBoost (Interspeech, Brno, 2013)"},{"issue":"5","key":"2315_CR16","doi-asserted-by":"publisher","first-page":"479","DOI":"10.3390\/e21050479","volume":"21","author":"N Hajarolasvadi","year":"2019","unstructured":"N. Hajarolasvadi, H. Demirel, 3D CNN-based speech emotion recognition using k-means clustering and spectrograms. Entropy 21(5), 479 (2019). https:\/\/doi.org\/10.3390\/e21050479","journal-title":"Entropy"},{"issue":"7","key":"2315_CR17","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1080\/08839514.2015.1051891","volume":"29","author":"A Harimi","year":"2015","unstructured":"A. Harimi, A. AhmadyFard, A. Shahzadi, K. Yaghmaie, Anger or joy? Emotion recognition using nonlinear dynamics of speech. Appl. Artif. Intell. 29(7), 675\u2013696 (2015). https:\/\/doi.org\/10.1080\/08839514.2015.1051891","journal-title":"Appl. Artif. Intell."},{"issue":"4","key":"2315_CR18","doi-asserted-by":"publisher","first-page":"262","DOI":"10.22452\/mjcs.vol29no4.2","volume":"29","author":"A Harimi","year":"2016","unstructured":"A. Harimi, H.S. Fakhr, A. Bakhshi, Recognition of emotion using reconstructed phase space of speech. Malays. J. Comput. Sci. 29(4), 262\u2013271 (2016). https:\/\/doi.org\/10.22452\/mjcs.vol29no4.2","journal-title":"Malays. J. Comput. Sci."},{"key":"2315_CR19","doi-asserted-by":"publisher","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun, Deep residual learning for image recognition, in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2016), pp. 770\u2013778. https:\/\/doi.org\/10.48550\/arXiv.1512.03385","DOI":"10.48550\/arXiv.1512.03385"},{"issue":"2","key":"2315_CR20","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1016\/j.bspc.2010.11.001","volume":"6","author":"L He","year":"2011","unstructured":"L. He, M. Lech, N.C. Maddage, N.B. Allen, Study of empirical mode decomposition and spectral analysis for stress and emotion classification in natural speech. Biomed. Signal Process. Control 6(2), 139\u2013146 (2011). https:\/\/doi.org\/10.1016\/j.bspc.2010.11.001","journal-title":"Biomed. Signal Process. Control"},{"key":"2315_CR21","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.inffus.2018.09.008","volume":"49","author":"MS Hossain","year":"2019","unstructured":"M.S. Hossain, G. Muhammad, Emotion recognition using deep learning approach from audio\u2013visual emotional big data. Inf. Fusion 49, 69\u201378 (2019). https:\/\/doi.org\/10.1016\/j.inffus.2018.09.008","journal-title":"Inf. Fusion"},{"key":"2315_CR22","doi-asserted-by":"publisher","unstructured":"Z. Huang, M. Dong, Q. Mao, Y. Zhan, Speech emotion recognition using CNN, in Proceedings of the 22nd ACM International Conference on Multimedia (ACM, 2014), pp. 801\u2013804. https:\/\/doi.org\/10.37200\/IJPR\/V24I8\/PR280260","DOI":"10.37200\/IJPR\/V24I8\/PR280260"},{"key":"2315_CR23","doi-asserted-by":"publisher","first-page":"101894","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"D. Issa, M.F. Demirci, A. Yazici, Speech emotion recognition with deep convolutional neural networks. Biomed. Signal Process. Control 59, 101894 (2020). https:\/\/doi.org\/10.1016\/j.bspc.2020.101894","journal-title":"Biomed. Signal Process. Control"},{"issue":"1","key":"2315_CR24","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2012","unstructured":"S. Ji, W. Xu, M. Yang, K. Yu, 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2012). https:\/\/doi.org\/10.1109\/TPAMI.2012.59","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2315_CR25","doi-asserted-by":"publisher","first-page":"117327","DOI":"10.1109\/ACCESS.2019.2936124","volume":"7","author":"RA Khalil","year":"2019","unstructured":"R.A. Khalil, E. Jones, M.I. Babar, T. Jan, M.H. Zafar, T. Alhussain, Speech emotion recognition using deep learning techniques: a review. IEEE Access 7, 117327\u2013117345 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2936124","journal-title":"IEEE Access"},{"key":"2315_CR26","doi-asserted-by":"publisher","unstructured":"J. Kim, K.P. Truong, G. Englebienne, V. Evers, Learning spectro-temporal features with 3D CNNs for speech emotion recognition, in 2017 Seventh International Conference on Affective Computing and Intelligent Interaction (ACII) (IEEE, 2017), pp. 383\u2013388. https:\/\/doi.org\/10.1109\/ACII.2017.8273628","DOI":"10.1109\/ACII.2017.8273628"},{"key":"2315_CR27","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/j.neucom.2011.12.021","volume":"84","author":"J Krajewski","year":"2012","unstructured":"J. Krajewski, S. Schnieder, D. Sommer, A. Batliner, B. Schuller, Applying multiple classifiers and non-linear dynamics features for detecting sleepiness from speech. Neurocomputing 84, 65\u201375 (2012). https:\/\/doi.org\/10.1016\/j.neucom.2011.12.021","journal-title":"Neurocomputing"},{"key":"2315_CR28","doi-asserted-by":"publisher","first-page":"1097","DOI":"10.1145\/3065386","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"A. Krizhevsky, I. Sutskever, G.E. Hinton, Imagenet classification with deep convolutional neural networks. Adv. Neural. Inf. Process. Syst. 25, 1097\u20131105 (2012). https:\/\/doi.org\/10.1145\/3065386","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2315_CR29","doi-asserted-by":"publisher","unstructured":"N.D. Lane, P. Georgiev, Can deep learning revolutionize mobile sensing?, in Proceedings of the 16th International Workshop on Mobile Computing Systems and Applications (2015), pp. 117\u2013122. https:\/\/doi.org\/10.1145\/2699343.2699349","DOI":"10.1145\/2699343.2699349"},{"issue":"1","key":"2315_CR30","doi-asserted-by":"publisher","first-page":"111","DOI":"10.3390\/e23020221","volume":"1","author":"H-G Ma","year":"2006","unstructured":"H.-G. Ma, C.-Z. Han, Selection of embedding dimension and delay time in phase space reconstruction. Front. Electr. Electron. Eng. China 1(1), 111\u2013114 (2006). https:\/\/doi.org\/10.3390\/e23020221","journal-title":"Front. Electr. Electron. Eng. China"},{"key":"2315_CR31","doi-asserted-by":"publisher","unstructured":"O. Martin, I. Kotsia, B. Macq, I. Pitas, The eNTERFACE'05 audio-visual emotion database, in 22nd International Conference on Data Engineering Workshops (ICDEW'06) (IEEE, 2006), p. 8. https:\/\/doi.org\/10.1109\/ICDEW.2006.145","DOI":"10.1109\/ICDEW.2006.145"},{"key":"2315_CR32","unstructured":"V. Nair, G.E. Hinton, Rectified linear units improve restricted boltzmann machines, in Proceedings of the 27th International Conference on Machine Learning (ICML-10) (2010), pp. 807\u2013814."},{"key":"2315_CR33","doi-asserted-by":"publisher","first-page":"397","DOI":"10.1016\/j.neucom.2022.04.065","volume":"493","author":"S Niyas","year":"2022","unstructured":"S. Niyas, S. Pawan, M.A. Kumar, J. Rajan, Medical image segmentation with 3D convolutional neural networks: a survey. Neurocomputing 493, 397\u2013413 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2022.04.065","journal-title":"Neurocomputing"},{"issue":"6","key":"2315_CR34","doi-asserted-by":"publisher","first-page":"2178","DOI":"10.1109\/TSP.2006.873479","volume":"54","author":"RJ Povinelli","year":"2006","unstructured":"R.J. Povinelli, M.T. Johnson, A.C. Lindgren, F.M. Roberts, J. Ye, Statistical models of reconstructed phase spaces for signal classification. IEEE Trans. Signal Process. 54(6), 2178\u20132186 (2006). https:\/\/doi.org\/10.1109\/TSP.2006.873479","journal-title":"IEEE Trans. Signal Process."},{"key":"2315_CR35","unstructured":"P. Prajith, Investigations on the applications of dynamical instabilities and deterministic chaos for speech signal processing, Ph.D Thesis, (2008). https:\/\/find.uoc.ac.in\/Record\/109095"},{"issue":"3","key":"2315_CR36","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1016\/j.ipm.2008.09.003","volume":"45","author":"J Rong","year":"2009","unstructured":"J. Rong, G. Li, Y.-P.P. Chen, Acoustic feature selection for automatic emotion recognition from speech. Inf. Process. Manag. 45(3), 315\u2013328 (2009). https:\/\/doi.org\/10.1016\/j.ipm.2008.09.003","journal-title":"Inf. Process. Manag."},{"key":"2315_CR37","doi-asserted-by":"publisher","unstructured":"B. Schuller, B. Vlasenko, F. Eyben, G. Rigoll, A. Wendemuth, Acoustic emotion recognition: a benchmark comparison of performances, in 2009 IEEE Workshop on Automatic Speech Recognition & Understanding (IEEE, 2009), pp. 552\u2013557. https:\/\/doi.org\/10.1109\/ASRU.2009.5372886","DOI":"10.1109\/ASRU.2009.5372886"},{"issue":"5","key":"2315_CR38","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1145\/3129340","volume":"61","author":"BW Schuller","year":"2018","unstructured":"B.W. Schuller, Speech emotion recognition: two decades in a nutshell, benchmarks, and ongoing trends. Commun. ACM 61(5), 90\u201399 (2018). https:\/\/doi.org\/10.1145\/3129340","journal-title":"Commun. ACM"},{"issue":"1","key":"2315_CR39","doi-asserted-by":"publisher","first-page":"100","DOI":"10.4218\/etrij.13.0112.0074","volume":"35","author":"Y Shekofteh","year":"2013","unstructured":"Y. Shekofteh, F. Almasganj, Feature extraction based on speech attractors in the reconstructed phase space for automatic speech recognition systems. ETRI J. 35(1), 100\u2013108 (2013). https:\/\/doi.org\/10.4218\/etrij.13.0112.0074","journal-title":"ETRI J."},{"key":"2315_CR40","doi-asserted-by":"publisher","unstructured":"K. Simonyan, A. Zisserman, Very deep convolutional networks for large-scale image recognition (2014). arXiv:1409.1556. https:\/\/doi.org\/10.48550\/arXiv.1409.1556","DOI":"10.48550\/arXiv.1409.1556"},{"issue":"1","key":"2315_CR41","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1049\/el.2014.3339","volume":"51","author":"P Song","year":"2014","unstructured":"P. Song, Y. Jin, C. Zha, L. Zhao, Speech emotion recognition method based on hidden factor analysis. Electron. Lett. 51(1), 112\u2013114 (2014). https:\/\/doi.org\/10.1049\/el.2014.3339","journal-title":"Electron. Lett."},{"key":"2315_CR42","doi-asserted-by":"crossref","unstructured":"A. Stuhlsatz, C. Meyer, F. Eyben, T. Zielke, G. Meier, B. Schuller, Deep neural networks for acoustic emotion recognition: raising the benchmarks, in 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2011), pp. 5688\u20135691","DOI":"10.1109\/ICASSP.2011.5947651"},{"key":"2315_CR43","doi-asserted-by":"publisher","unstructured":"C. Szegedy et al., Going deeper with convolutions, in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2015), pp. 1\u20139. https:\/\/doi.org\/10.1109\/ICASSP.2011.5947651","DOI":"10.1109\/ICASSP.2011.5947651"},{"key":"2315_CR44","doi-asserted-by":"publisher","first-page":"22081","DOI":"10.1109\/ACCESS.2017.2761539","volume":"5","author":"A Torfi","year":"2017","unstructured":"A. Torfi, S.M. Iranmanesh, N. Nasrabadi, J. Dawson, 3d convolutional neural networks for cross audio-visual matching recognition. IEEE Access 5, 22081\u201322091 (2017). https:\/\/doi.org\/10.1109\/ACCESS.2017.2761539","journal-title":"IEEE Access"},{"key":"2315_CR45","doi-asserted-by":"crossref","unstructured":"D. Tran, L. Bourdev, R. Fergus, L. Torresani, M. Paluri, Learning spatiotemporal features with 3d convolutional networks, in Proceedings of the IEEE International Conference on Computer Vision (2015), pp. 4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"2315_CR46","doi-asserted-by":"publisher","first-page":"106547","DOI":"10.1016\/j.knosys.2020.106547","volume":"211","author":"T Tuncer","year":"2021","unstructured":"T. Tuncer, S. Dogan, U.R. Acharya, Automated accurate speech emotion recognition system using twine shuffle pattern and iterative neighborhood component analysis techniques. Knowl. Based Syst. 211, 106547 (2021). https:\/\/doi.org\/10.1016\/j.knosys.2020.106547","journal-title":"Knowl. Based Syst."},{"key":"2315_CR47","doi-asserted-by":"publisher","first-page":"1679","DOI":"10.3389\/fpsyg.2018.01679","volume":"9","author":"S Wallot","year":"2018","unstructured":"S. Wallot, D. M\u00f8nster, Calculation of average mutual information (ami) and false-nearest neighbors (fnn) for the estimation of embedding parameters of multidimensional time series in matlab. Front. Psychol. 9, 1679 (2018). https:\/\/doi.org\/10.3389\/fpsyg.2018.01679","journal-title":"Front. Psychol."},{"issue":"5","key":"2315_CR48","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1016\/j.specom.2010.08.013","volume":"53","author":"S Wu","year":"2011","unstructured":"S. Wu, T.H. Falk, W.-Y. Chan, Automatic speech emotion recognition using modulation spectral features. Speech Commun. 53(5), 768\u2013785 (2011). https:\/\/doi.org\/10.1016\/j.specom.2010.08.013","journal-title":"Speech Commun."},{"issue":"5","key":"2315_CR49","doi-asserted-by":"publisher","first-page":"1415","DOI":"10.1016\/j.sigpro.2009.09.009","volume":"90","author":"B Yang","year":"2010","unstructured":"B. Yang, M. Lugger, Emotion recognition from speech signals using new harmony features. Signal Process. 90(5), 1415\u20131423 (2010). https:\/\/doi.org\/10.1016\/j.sigpro.2009.09.009","journal-title":"Signal Process."},{"issue":"3","key":"2315_CR50","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/TAFFC.2016.2553038","volume":"8","author":"S Zhalehpour","year":"2016","unstructured":"S. Zhalehpour, O. Onder, Z. Akhtar, C.E. Erdem, BAUM-1: a spontaneous audio-visual face database of affective and mental states. IEEE Trans. Affect. Comput. 8(3), 300\u2013313 (2016). https:\/\/doi.org\/10.1109\/TAFFC.2016.2553038","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"6","key":"2315_CR51","doi-asserted-by":"publisher","first-page":"1389","DOI":"10.1109\/JSAC.2019.2904363","volume":"37","author":"C Zhang","year":"2019","unstructured":"C. Zhang, H. Zhang, J. Qiao, D. Yuan, M. Zhang, Deep transfer learning for intelligent cellular traffic prediction based on cross-domain big data. IEEE J. Sel. Areas Commun. 37(6), 1389\u20131401 (2019). https:\/\/doi.org\/10.1109\/JSAC.2019.2904363","journal-title":"IEEE J. Sel. Areas Commun."},{"issue":"6","key":"2315_CR52","doi-asserted-by":"publisher","first-page":"1576","DOI":"10.1109\/TMM.2017.2766843","volume":"20","author":"S Zhang","year":"2017","unstructured":"S. Zhang, S. Zhang, T. Huang, W. Gao, Speech emotion recognition using deep convolutional neural network and discriminant temporal pyramid matching. IEEE Trans. Multimed. 20(6), 1576\u20131590 (2017). https:\/\/doi.org\/10.1109\/TMM.2017.2766843","journal-title":"IEEE Trans. Multimed."},{"issue":"6","key":"2315_CR53","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1049\/iet-spr.2017.0320","volume":"12","author":"J Zhao","year":"2018","unstructured":"J. Zhao, X. Mao, L. Chen, Learning deep features to recognise speech emotion using merged deep CNN. IET Signal Proc. 12(6), 713\u2013721 (2018). https:\/\/doi.org\/10.1049\/iet-spr.2017.0320","journal-title":"IET Signal Proc."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02315-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-023-02315-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02315-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,21]],"date-time":"2023-06-21T05:08:40Z","timestamp":1687324120000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-023-02315-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,1]]},"references-count":53,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["2315"],"URL":"https:\/\/doi.org\/10.1007\/s00034-023-02315-4","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3,1]]},"assertion":[{"value":"5 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 February 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 March 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}