{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T10:13:07Z","timestamp":1767262387223,"version":"3.37.3"},"reference-count":78,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,6,21]],"date-time":"2022-06-21T00:00:00Z","timestamp":1655769600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,21]],"date-time":"2022-06-21T00:00:00Z","timestamp":1655769600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s10772-022-09981-w","type":"journal-article","created":{"date-parts":[[2022,6,21]],"date-time":"2022-06-21T18:07:39Z","timestamp":1655834859000},"page":"123-139","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A multi-modal deep learning system for Arabic emotion recognition"],"prefix":"10.1007","volume":"26","author":[{"given":"Ftoon","family":"Abu Shaqra","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8366-0949","authenticated-orcid":false,"given":"Rehab","family":"Duwairi","sequence":"additional","affiliation":[]},{"given":"Mahmoud","family":"Al-Ayyoub","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,21]]},"reference":[{"key":"9981_CR1","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1016\/j.specom.2020.04.005","volume":"122","author":"L Abdel-Hamid","year":"2020","unstructured":"Abdel-Hamid, L. (2020). Egyptian Arabic speech emotion recognition using prosodic, spectral and wavelet features. Speech Communication, 122, 19\u201330.","journal-title":"Speech Communication"},{"key":"9981_CR2","doi-asserted-by":"crossref","unstructured":"Al-Azani, S., & El-Alfy, E. S. M. (2017). Hybrid deep learning for sentiment polarity determination of Arabic microblogs. In International conference on neural information processing (pp. 491\u2013500). Springer.","DOI":"10.1007\/978-3-319-70096-0_51"},{"issue":"1","key":"9981_CR3","first-page":"364","volume":"9","author":"SO Alhumoud","year":"2015","unstructured":"Alhumoud, S. O., Altuwaijri, M. I., Albuhairi, T. M., & Alohaideb, W. M. (2015). Survey on Arabic sentiment analysis in twitter. International Science Index, 9(1), 364\u2013368.","journal-title":"International Science Index"},{"issue":"3","key":"9981_CR4","doi-asserted-by":"publisher","first-page":"358","DOI":"10.1007\/s10803-009-0884-3","volume":"40","author":"E Bal","year":"2010","unstructured":"Bal, E., Harden, E., Lamb, D., Van Hecke, A. V., Denver, J. W., & Porges, S. W. (2010). Emotion recognition in children with autism spectrum disorders: Relations to eye gaze and autonomic state. Journal of Autism and Developmental Disorders, 40(3), 358\u2013370.","journal-title":"Journal of Autism and Developmental Disorders"},{"issue":"5","key":"9981_CR5","doi-asserted-by":"publisher","first-page":"691","DOI":"10.1037\/a0017088","volume":"9","author":"T B\u00e4nziger","year":"2009","unstructured":"B\u00e4nziger, T., Grandjean, D., & Scherer, K. R. (2009). Emotion recognition from expressions in face, voice, and body: The Multimodal Emotion Recognition Test (MERT). Emotion, 9(5), 691.","journal-title":"Emotion"},{"key":"9981_CR6","first-page":"271","volume":"2010","author":"T B\u00e4nziger","year":"2010","unstructured":"B\u00e4nziger, T., & Scherer, K. R. (2010). Introducing the Geneva multimodal emotion portrayal (gemep) corpus. Blueprint for Affective Computing: A Sourcebook, 2010, 271\u2013294.","journal-title":"Blueprint for Affective Computing: A Sourcebook"},{"key":"9981_CR7","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1201\/b10368-6","volume":"20094635","author":"S Brave","year":"2009","unstructured":"Brave, S., & Nass, C. (2009). Emotion in human-computer interaction. Human-Computer Interaction Fundamentals, 20094635, 53\u201368.","journal-title":"Human-Computer Interaction Fundamentals"},{"key":"9981_CR8","doi-asserted-by":"crossref","unstructured":"Busso, C., Deng, Z., Yildirim, S., Bulut, M., Lee, C. M., Kazemzadeh, A., & Narayanan, S. (2004). Analysis of emotion recognition using facial expressions, speech and multimodal information. In Proceedings of the 6th international conference on multimodal interfaces (pp. 205\u2013211).","DOI":"10.1145\/1027933.1027968"},{"issue":"1","key":"9981_CR9","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1109\/TAFFC.2016.2515617","volume":"8","author":"C Busso","year":"2016","unstructured":"Busso, C., Parthasarathy, S., Burmania, A., AbdelWahab, M., Sadoughi, N., & Provost, E. M. (2016). MSP-IMPROV: An acted corpus of dyadic interactions to study emotion perception. IEEE Transactions on Affective Computing, 8(1), 67\u201380.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9981_CR10","doi-asserted-by":"crossref","unstructured":"Buyukyilmaz, M., & Cibikdiken, A. O. (2016). Voice gender recognition using deep learning. In 2016 international conference on modeling, simulation and optimization technologies and applications (MSOTA2016). Atlantis Press.","DOI":"10.2991\/msota-16.2016.90"},{"issue":"4","key":"9981_CR11","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1109\/TAFFC.2014.2336244","volume":"5","author":"H Cao","year":"2014","unstructured":"Cao, H., Cooper, D. G., Keutmann, M. K., Gur, R. C., Nenkova, A., & Verma, R. (2014). CREMA-D: Crowd-sourced emotional multimodal actors dataset. IEEE Transactions on Affective Computing, 5(4), 377\u2013390.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9981_CR12","doi-asserted-by":"crossref","unstructured":"Castellano, G., Kessous, L., & Caridakis, G. (2008). Emotion recognition through multiple modalities: face, body gesture, speech. In\u00a0Affect and emotion in human-computer interaction\u00a0(pp. 92\u2013103). Springer.","DOI":"10.1007\/978-3-540-85099-1_8"},{"key":"9981_CR13","unstructured":"Chen, H. B. (1998).\u00a0Detection and transmission of facial expression for low speed web-based teaching\u00a0(Doctoral dissertation, Thesis for Degree of Bachelor of Engineering, National University of Singapore)."},{"issue":"3","key":"9981_CR14","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1109\/TAFFC.2018.2807817","volume":"11","author":"N Colneri\u0109","year":"2018","unstructured":"Colneri\u0109, N., & Demsar, J. (2018). Emotion recognition on twitter: Comparative study and training a unison model. IEEE Transactions on Affective Computing., 11(3), 433\u2013446.","journal-title":"IEEE Transactions on Affective Computing."},{"key":"9981_CR15","unstructured":"De Silva, L. C., & Ng, P. C. (2000, March). Bimodal emotion recognition. In\u00a0Proceedings fourth IEEE international conference on automatic face and gesture recognition (Cat. No. PR00580)\u00a0(pp. 332\u2013335). IEEE."},{"key":"9981_CR16","unstructured":"Dhall, A., Goecke, R., Lucey, S., & Gedeon, T. (2011). Acted facial expressions in the wild database.\u00a0Australian National University, Canberra, Australia, Technical Report TR-CS-11,\u00a02, 1.\u200f"},{"key":"9981_CR17","unstructured":"Dupuis, K., & Pichora-Fuller, M. K. (2010). Toronto emotional speech set (TESS). University of Toronto."},{"key":"9981_CR18","doi-asserted-by":"crossref","unstructured":"Ebrahimi Kahou, S., Michalski, V., Konda, K., Memisevic, R., & Pal, C. (2015). Recurrent neural networks for emotion recognition in video. In\u00a0Proceedings of the 2015 ACM international conference on multimodal interaction\u00a0(pp. 467\u2013474).","DOI":"10.1145\/2818346.2830596"},{"issue":"3","key":"9981_CR19","doi-asserted-by":"publisher","first-page":"550","DOI":"10.1037\/0033-295X.99.3.550","volume":"99","author":"P Ekman","year":"1992","unstructured":"Ekman, P. (1992a). Are there basic emotions? Psychological Review, 99(3), 550\u2013553.","journal-title":"Psychological Review"},{"issue":"3\u20134","key":"9981_CR20","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1080\/02699939208411068","volume":"6","author":"P Ekman","year":"1992","unstructured":"Ekman, P. (1992b). An argument for basic emotions. Cognition & Emotion, 6(3\u20134), 169\u2013200.","journal-title":"Cognition & Emotion"},{"key":"9981_CR21","doi-asserted-by":"publisher","first-page":"118","DOI":"10.3389\/fpsyg.2013.00118","volume":"4","author":"JB Engelmann","year":"2013","unstructured":"Engelmann, J. B., & Pogosyan, M. (2013). Emotion perception across cultures: The role of cognitive mechanisms. Frontiers in Psychology, 4, 118.","journal-title":"Frontiers in Psychology"},{"issue":"2","key":"9981_CR22","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","volume":"7","author":"F Eyben","year":"2015","unstructured":"Eyben, F., Scherer, K. R., Schuller, B. W., Sundberg, J., Andr\u00e9, E., Busso, C., & Truong, K. P. (2015). The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE Transactions on Affective Computing, 7(2), 190\u2013202.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9981_CR23","doi-asserted-by":"crossref","unstructured":"Grimm, M., Kroschel, K., & Narayanan, S. (2008, June). The Vera am Mittag German audio-visual emotional speech database. In\u00a02008 IEEE international conference on multimedia and expo\u00a0(pp. 865\u2013868). IEEE.","DOI":"10.1109\/ICME.2008.4607572"},{"key":"9981_CR24","unstructured":"Hannun, A., Case, C., Casper, J., Catanzaro, B., Diamos, G., Elsen, E., et al. (2014). Deep speech: Scaling up end-to-end speech recognition.\u00a0arXiv preprint, arXiv:1412.5567"},{"key":"9981_CR25","doi-asserted-by":"crossref","unstructured":"Hifny, Y., & Ali, A. (2019). Efficient Arabic emotion recognition using deep neural networks. In\u00a0Proceedings of the IEEE international conference on acoustics, speech and signal processing (ICASSP-2019) (pp. 6710\u20136714).","DOI":"10.1109\/ICASSP.2019.8683632"},{"key":"9981_CR26","unstructured":"Horvat, M., Popovi\u0107, S., & Cosi\u0107, K. (2013). Multimedia stimuli databases usage patterns: A survey report. In\u00a0The 36th international convention on information and communication technology, electronics and microelectronics (MIPRO)\u00a0(pp. 993\u2013997). IEEE.\u200f"},{"issue":"5","key":"9981_CR27","doi-asserted-by":"publisher","first-page":"1787","DOI":"10.1007\/s12652-017-0644-8","volume":"10","author":"Y Huang","year":"2019","unstructured":"Huang, Y., Tian, K., Wu, A., & Zhang, G. (2019). Feature fusion methods research based on deep belief networks for speech emotion recognition under noise condition. Journal of Ambient Intelligence and Humanized Computing, 10(5), 1787\u20131798.","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"18","key":"9981_CR28","doi-asserted-by":"publisher","first-page":"1543","DOI":"10.1016\/j.cub.2009.07.051","volume":"19","author":"RE Jack","year":"2009","unstructured":"Jack, R. E., Blais, C., Scheepers, C., Schyns, P. G., & Caldara, R. (2009). Cultural confusions show that facial expressions are not universal. Current Biology, 19(18), 1543\u20131548.","journal-title":"Current Biology"},{"key":"9981_CR29","unstructured":"Jackson, P., & Haq, S. (2014). Surrey audio-visual expressed emotion (savee) database. University of Surrey."},{"key":"9981_CR30","unstructured":"Kadiri, S. R., Gangamohan, P., Mittal, V. K., & Yegnanarayana, B. (2014, December). Naturalistic audio-visual emotion database. In\u00a0Proceedings of the 11th international conference on natural language processing\u00a0(pp. 206\u2013213).\u200f"},{"issue":"2","key":"9981_CR31","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s12193-015-0195-2","volume":"10","author":"SE Kahou","year":"2016","unstructured":"Kahou, S. E., Bouthillier, X., Lamblin, P., Gulcehre, C., Michalski, V., Konda, K., & Ferrari, R. C. (2016). Emonets: Multimodal deep learning approaches for emotion recognition in video. Journal on Multimodal User Interfaces, 10(2), 99\u2013111.","journal-title":"Journal on Multimodal User Interfaces"},{"issue":"4","key":"9981_CR32","doi-asserted-by":"publisher","first-page":"1041","DOI":"10.1016\/j.eswa.2013.07.101","volume":"41","author":"D Kang","year":"2014","unstructured":"Kang, D., & Park, Y. (2014). Based measurement of customer satisfaction in mobile service: Sentiment analysis and VIKOR approach. Expert Systems with Applications, 41(4), 1041\u20131050.","journal-title":"Expert Systems with Applications"},{"issue":"7","key":"9981_CR33","doi-asserted-by":"publisher","first-page":"1197","DOI":"10.1007\/s00779-015-0842-3","volume":"19","author":"E Kanjo","year":"2015","unstructured":"Kanjo, E., Al-Husain, L., & Chamberlain, A. (2015). Emotions in context: Examining pervasive affective sensing systems, applications, and analyses. Personal and Ubiquitous Computing, 19(7), 1197\u20131212.","journal-title":"Personal and Ubiquitous Computing"},{"key":"9981_CR34","doi-asserted-by":"crossref","unstructured":"Kao, E. C. C., Liu, C. C., Yang, T. H., Hsieh, C. T., & Soo, V. W. (2009). Towards text-based emotion detection a survey and possible improvements. In\u00a02009 International conference on information management and engineering\u00a0(pp. 70\u201374). IEEE.","DOI":"10.1109\/ICIME.2009.113"},{"issue":"2","key":"9981_CR35","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1086\/227461","volume":"87","author":"TD Kemper","year":"1981","unstructured":"Kemper, T. D. (1981). Social constructionist and positivist approaches to the sociology of emotions. American Journal of Sociology, 87(2), 336\u2013362.","journal-title":"American Journal of Sociology"},{"key":"9981_CR36","doi-asserted-by":"crossref","unstructured":"Khasawneh, R. T., Wahsheh, H. A., Alsmadi, I. M., & AI-Kabi, M. N. (2015). Arabic sentiment polarity identification using a hybrid approach. In\u00a02015 6th international conference on information and communication systems (ICICS)\u00a0(pp. 148\u2013153). IEEE.","DOI":"10.1109\/IACS.2015.7103218"},{"key":"9981_CR37","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-019-01456-2","author":"Y Kim","year":"2019","unstructured":"Kim, Y., Moon, J., Sung, N. J., & Hong, M. (2019). Correlation between selected gait variables and emotion using virtual reality. Journal of Ambient Intelligence and Humanized Computing. https:\/\/doi.org\/10.1007\/s12652-019-01456-2","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"2","key":"9981_CR38","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1121\/1.398894","volume":"87","author":"DH Klatt","year":"1990","unstructured":"Klatt, D. H., & Klatt, L. C. (1990). Analysis, synthesis, and perception of voice quality variations among female and male talkers. The Journal of the Acoustical Society of America, 87(2), 820\u2013857.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"2","key":"9981_CR39","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/s10470-018-1142-4","volume":"96","author":"S Klaylat","year":"2018","unstructured":"Klaylat, S., Osman, Z., Hamandi, L., & Zantout, R. (2018). Emotion recognition in Arabic speech. Analog Integrated Circuits and Signal Processing, 96(2), 337\u2013351.","journal-title":"Analog Integrated Circuits and Signal Processing"},{"issue":"1","key":"9981_CR40","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/T-AFFC.2011.15","volume":"3","author":"S Koelstra","year":"2011","unstructured":"Koelstra, S., Muhl, C., Soleymani, M., Lee, J. S., Yazdani, A., Ebrahimi, T., & Patras, I. (2011). Deap: A database for emotion analysis; using physiological signals. IEEE Transactions on Affective Computing, 3(1), 18\u201331.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9981_CR41","doi-asserted-by":"crossref","unstructured":"Ko\u0142akowska, A., Landowska, A., Szwoch, M., Szwoch, W., & Wrobel, M. R. (2014). Emotion recognition and its applications. In Human-computer systems interaction: Backgrounds and applications 3\u00a0(pp. 51\u201362). Springer.","DOI":"10.1007\/978-3-319-08491-6_5"},{"key":"9981_CR42","doi-asserted-by":"crossref","unstructured":"Koolagudi, S. G., Maity, S., Kumar, V. A., Chakrabarti, S., & Rao, K. S. (2009). IITKGP-SESC: speech database for emotion analysis. In\u00a0International conference on contemporary computing\u00a0(pp. 485\u2013492). Springer.","DOI":"10.1007\/978-3-642-03547-0_46"},{"key":"9981_CR43","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems, 25, 1097\u20131105.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"9981_CR80","unstructured":"Legge, J. (1885). The sacred books of china, the texts of confucianism. Translated by James Legge. Oxford: Clarendon Press."},{"issue":"1","key":"9981_CR44","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.csl.2012.01.008","volume":"27","author":"M Li","year":"2013","unstructured":"Li, M., Han, K. J., & Narayanan, S. (2013). Automatic speaker age and gender recognition using acoustic and prosodic level information fusion. Computer Speech & Language, 27(1), 151\u2013167.","journal-title":"Computer Speech & Language"},{"key":"9981_CR45","doi-asserted-by":"publisher","unstructured":"Liu, M., Wang, R., Li, S., Shan, S., Huang, Z., & Chen, X. (2014). Combining multiple kernel methods on riemannian manifold for emotion recognition in the wild. In Proceedings of the 16th international conference on multimodal interaction (pp. 494\u2013501). https:\/\/doi.org\/10.1145\/2663204.2666274.","DOI":"10.1145\/2663204.2666274"},{"key":"9981_CR46","doi-asserted-by":"crossref","unstructured":"Liu, Y., Sourina, O., & Nguyen, M. K. (2011). Real-time EEG-based emotion recognition and its applications. In\u00a0Transactions on computational science XII\u00a0(pp. 256\u2013277). Springer.","DOI":"10.1007\/978-3-642-22336-5_13"},{"issue":"5","key":"9981_CR47","doi-asserted-by":"publisher","first-page":"e0196391","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"Livingstone, S. R., & Russo, F. A. (2018). The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English. PLoS ONE, 13(5), e0196391.","journal-title":"PLoS ONE"},{"key":"9981_CR48","doi-asserted-by":"publisher","unstructured":"Martin, O., Kotsia, I., Macq, B., & Pitas, I. (2006). The eNTERFACE'05 audio-visual emotion database. In\u00a022nd international conference on data engineering workshops (ICDEW'06) (pp. 8\u20138), 2006. https:\/\/doi.org\/10.1109\/ICDEW.2006.145.","DOI":"10.1109\/ICDEW.2006.145"},{"issue":"4","key":"9981_CR49","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1177\/1094670502004004004","volume":"4","author":"AS Mattila","year":"2002","unstructured":"Mattila, A. S., & Enz, C. A. (2002). The role of emotions in service encounters. Journal of Service Research, 4(4), 268\u2013277.","journal-title":"Journal of Service Research"},{"issue":"1","key":"9981_CR50","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1109\/T-AFFC.2011.20","volume":"3","author":"G McKeown","year":"2011","unstructured":"McKeown, G., Valstar, M., Cowie, R., Pantic, M., & Schroder, M. (2011). The semaine database: Annotated multimodal records of emotionally colored conversations between a person and a limited agent. IEEE Transactions on Affective Computing, 3(1), 5\u201317.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9981_CR51","doi-asserted-by":"crossref","unstructured":"Meddeb, M., Karray, H., & Alimi, A. M. (2015). Speech emotion recognition based on Arabic features. In\u00a02015 15th international conference on intelligent systems design and applications (ISDA)\u00a0(pp. 46\u201351). IEEE.\u200f","DOI":"10.1109\/ISDA.2015.7489165"},{"issue":"3","key":"9981_CR52","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1007\/s10772-017-9422-4","volume":"20","author":"D Najar","year":"2017","unstructured":"Najar, D., & Mesfar, S. (2017). Opinion mining and sentiment analysis for Arabic on-line texts: Application on the political domain. International Journal of Speech Technology, 20(3), 575\u2013585.","journal-title":"International Journal of Speech Technology"},{"key":"9981_CR53","doi-asserted-by":"crossref","unstructured":"Paleari, M., Huet, B., & Chellali, R. (2010, July). Towards multimodal emotion recognition: a new approach. In\u00a0Proceedings of the ACM international conference on image and video retrieval\u00a0(pp. 174\u2013181).","DOI":"10.1145\/1816041.1816069"},{"key":"9981_CR54","unstructured":"Parmar, D. N., & Mehta, B. B. (2014). Face recognition methods & applications.\u00a0arXiv preprint arXiv:1403.0485."},{"key":"9981_CR55","unstructured":"Petrushin, V. (1999). Emotion in speech: Recognition and application to call centers. In\u00a0Proceedings of artificial neural networks in engineering, pp. 7\u201310."},{"key":"9981_CR56","doi-asserted-by":"crossref","unstructured":"Petrushin, V. A. (2000). Emotion recognition in speech signal: experimental study, development, and application. In\u00a0Sixth international conference on spoken language processing.","DOI":"10.21437\/ICSLP.2000-791"},{"key":"9981_CR57","unstructured":"Plutchik, R. (1984). Emotions: A general psychoevolutionary theory. In K. R. Scherer & P. Ekman (Eds.), Approaches to emotion (pp. 197\u2013219). Erlbaum."},{"issue":"4","key":"9981_CR58","doi-asserted-by":"publisher","first-page":"344","DOI":"10.1511\/2001.28.344","volume":"89","author":"R Plutchik","year":"2001","unstructured":"Plutchik, R. (2001). The nature of emotions: Human emotions have deep evolutionary roots, a fact that may explain their complexity and provide tools for clinical practice. American Scientist, 89(4), 344\u2013350.","journal-title":"American Scientist"},{"key":"9981_CR59","doi-asserted-by":"crossref","unstructured":"Ranganathan, H., Chakraborty, S., & Panchanathan, S. (2016). Multimodal emotion recognition using deep learning architectures. In\u00a02016 IEEE winter conference on applications of computer vision (WACV)\u00a0(pp. 1\u20139). IEEE.","DOI":"10.1109\/WACV.2016.7477679"},{"key":"9981_CR60","doi-asserted-by":"crossref","unstructured":"Ringeval, F., Sonderegger, A., Sauer, J., & Lalanne, D. (2013). Introducing the RECOLA multimodal corpus of remote collaborative and affective interactions. In\u00a02013 10th IEEE international conference and workshops on automatic face and gesture recognition (FG)\u00a0(pp. 1\u20138). IEEE.\u200f","DOI":"10.1109\/FG.2013.6553805"},{"key":"9981_CR61","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-019-01280-8","author":"LO Sawada","year":"2019","unstructured":"Sawada, L. O., Mano, L. Y., Neto, J. R. T., & Ueyama, J. (2019). A module-based framework to emotion recognition by speech: A case study in clinical simulation. Journal of Ambient Intelligence and Humanized Computing. https:\/\/doi.org\/10.1007\/s12652-019-01280-8","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"key":"9981_CR62","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.procs.2019.04.009","volume":"151","author":"FA Shaqra","year":"2019","unstructured":"Shaqra, F. A., Duwairi, R., & Al-Ayyoub, M. (2019a). Recognizing emotion from speech based on age and gender using hierarchical models. Procedia Computer Science, 151, 37\u201344.","journal-title":"Procedia Computer Science"},{"key":"9981_CR63","doi-asserted-by":"crossref","unstructured":"Shaqra, F. A., Duwairi, R., & Al-Ayyoub, M. (2019b, August). The Audio-Visual Arabic Dataset for Natural Emotions. In\u00a02019b 7th international conference on future internet of things and cloud (FiCloud)\u00a0(pp. 324\u2013329). IEEE.","DOI":"10.1109\/FiCloud.2019.00054"},{"key":"9981_CR64","doi-asserted-by":"crossref","unstructured":"Soleymani, M., Chanel, G., Kierkels, J. J., & Pun, T. (2008). Affective characterization of movie scenes based on multimedia content analysis and user's physiological emotional responses. In\u00a02008 Tenth IEEE international symposium on multimedia\u00a0(pp. 228\u2013235). IEEE.\u200f","DOI":"10.1109\/ISM.2008.14"},{"issue":"1","key":"9981_CR65","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/T-AFFC.2011.25","volume":"3","author":"M Soleymani","year":"2011","unstructured":"Soleymani, M., Lichtenauer, J., Pun, T., & Pantic, M. (2011). A multimodal database for affect recognition and implicit tagging. IEEE Transactions on Affective Computing, 3(1), 42\u201355.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9981_CR66","unstructured":"Suarez, M. T., Cu, J., & Sta, M. (2012). Building a multimodal laughter database for emotion recognition. In\u00a0LREC,\u00a0(pp. 2347\u20132350).\u200f"},{"key":"9981_CR67","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., Sermanet, P., Reed, S., Anguelov, D., & Rabinovich, A. (2015). Going deeper with convolutions. In\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition,\u00a0(pp. 1\u20139).\u200f","DOI":"10.1109\/CVPR.2015.7298594"},{"issue":"4","key":"9981_CR68","doi-asserted-by":"publisher","first-page":"1699","DOI":"10.1121\/1.397959","volume":"85","author":"IR Titze","year":"1989","unstructured":"Titze, I. R. (1989). Physiologic and acoustic differences between male and female voices. The Journal of the Acoustical Society of America, 85(4), 1699\u20131707.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9981_CR69","doi-asserted-by":"crossref","unstructured":"Tokuno, S., Tsumatori, G., Shono, S., Takei, E., Yamamoto, T., Suzuki, G., & Shimura, M. (2011). Usage of emotion recognition in military health care. In\u00a02011 defense science research conference and expo (DSR)\u00a0(pp. 1\u20135). IEEE.","DOI":"10.1109\/DSR.2011.6026823"},{"issue":"8","key":"9981_CR70","doi-asserted-by":"publisher","first-page":"1301","DOI":"10.1109\/JSTSP.2017.2764438","volume":"11","author":"P Tzirakis","year":"2017","unstructured":"Tzirakis, P., Trigeorgis, G., Nicolaou, M. A., Schuller, B. W., & Zafeiriou, S. (2017). End-to-end multimodal emotion recognition using deep neural networks. IEEE Journal of Selected Topics in Signal Processing, 11(8), 1301\u20131309.","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"issue":"3\u20134","key":"9981_CR71","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1007\/s00779-018-01195-9","volume":"23","author":"Y Wang","year":"2019","unstructured":"Wang, Y. (2019). Multimodal emotion recognition algorithm based on edge network emotion element compensation and data fusion. Personal and Ubiquitous Computing, 23(3\u20134), 383\u2013392.","journal-title":"Personal and Ubiquitous Computing"},{"key":"9981_CR72","doi-asserted-by":"publisher","DOI":"10.1017\/ATSIP.2014.11","author":"CH Wu","year":"2014","unstructured":"Wu, C. H., Lin, J. C., & Wei, W. L. (2014). Survey on audiovisual emotion recognition: Databases, features, and data fusion strategies. APSIPA Transactions on Signal and Information Processing. https:\/\/doi.org\/10.1017\/ATSIP.2014.11","journal-title":"APSIPA Transactions on Signal and Information Processing"},{"issue":"14","key":"9981_CR73","doi-asserted-by":"publisher","first-page":"4913","DOI":"10.3390\/s21144913","volume":"21","author":"B Xie","year":"2021","unstructured":"Xie, B., Sidulova, M., & Park, C. H. (2021). Robust multimodal emotion recognition from conversation with transformer-based crossmodality fusion. Sensors, 21(14), 4913.","journal-title":"Sensors"},{"key":"9981_CR74","doi-asserted-by":"crossref","unstructured":"Yu, Z., & Zhang, C. (2015). Image based static facial expression recognition with multiple deep network learning. In\u00a0Proceedings of the 2015 ACM on international conference on multimodal interaction\u00a0(pp. 435\u2013442).\u200f","DOI":"10.1145\/2818346.2830595"},{"key":"9981_CR75","doi-asserted-by":"crossref","unstructured":"Zeiler, M. D., & Fergus, R. (2014). Visualizing and understanding convolutional networks. In\u00a0European conference on computer vision\u00a0(pp. 818\u2013833). Springer.\u200f","DOI":"10.1007\/978-3-319-10590-1_53"},{"issue":"1","key":"9981_CR76","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1109\/TPAMI.2008.52","volume":"31","author":"Z Zeng","year":"2008","unstructured":"Zeng, Z., Pantic, M., Roisman, G. I., & Huang, T. S. (2008). A survey of affect recognition methods: Audio, visual, and spontaneous expressions. IEEE Transactions on Pattern Analysis and Machine Intelligence, 31(1), 39\u201358.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"9981_CR77","unstructured":"Zhong-xiu, S. O. N. G. (2008). An assessment of James Legge's translation of culturally-loaded words in the book of rites.\u00a0Journal of Sanming University, (pp. 301\u201330)."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09981-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-022-09981-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-022-09981-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,27]],"date-time":"2024-09-27T10:32:27Z","timestamp":1727433147000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-022-09981-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,21]]},"references-count":78,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["9981"],"URL":"https:\/\/doi.org\/10.1007\/s10772-022-09981-w","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2022,6,21]]},"assertion":[{"value":"29 June 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 May 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}