{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T15:56:29Z","timestamp":1776268589420,"version":"3.50.1"},"reference-count":110,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,4,11]],"date-time":"2025-04-11T00:00:00Z","timestamp":1744329600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,4,11]],"date-time":"2025-04-11T00:00:00Z","timestamp":1744329600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Khalifa University of Science and Technology","award":["CIRA-2020-031"],"award-info":[{"award-number":["CIRA-2020-031"]}]},{"name":"Khalifa University of Science and Technology","award":["CIRA-2020-031"],"award-info":[{"award-number":["CIRA-2020-031"]}]},{"name":"Khalifa University of Science and Technology","award":["CIRA-2020-031"],"award-info":[{"award-number":["CIRA-2020-031"]}]},{"name":"Khalifa University of Science and Technology","award":["CIRA-2020-031"],"award-info":[{"award-number":["CIRA-2020-031"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-025-11197-8","type":"journal-article","created":{"date-parts":[[2025,4,11]],"date-time":"2025-04-11T14:14:42Z","timestamp":1744380882000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Speech emotion recognition in conversations using artificial intelligence: a systematic review and meta-analysis"],"prefix":"10.1007","volume":"58","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6181-8306","authenticated-orcid":false,"given":"Ghada","family":"Alhussein","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0615-322X","authenticated-orcid":false,"given":"Ioannis","family":"Ziogas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shiza","family":"Saleem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9932-9302","authenticated-orcid":false,"given":"Leontios J.","family":"Hadjileontiadis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,4,11]]},"reference":[{"key":"11197_CR1","doi-asserted-by":"crossref","unstructured":"Alhussein G, Alkhodari M, Khandokher A, Hadjileontiadis LJ (2022) Emotional climate recognition in interactive conversational speech using deep learning. In 2022 IEEE International Conference on Digital Health (ICDH), pages 96\u2013103","DOI":"10.1109\/ICDH55609.2022.00023"},{"key":"11197_CR2","doi-asserted-by":"crossref","unstructured":"Arumugam B, Bhattacharjee SD, Yuan J (2022) Multimodal attentive learning for real-time explainable emotion recognition in conversations. In 2022 IEEE International Symposium on Circuits and Systems (ISCAS), pages 1210\u20131214. IEEE","DOI":"10.1109\/ISCAS48785.2022.9938005"},{"key":"11197_CR3","first-page":"12449","volume":"33","author":"A Baevski","year":"2020","unstructured":"Baevski A, Zhou Y, Mohamed A, Auli M (2020) wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv Neural Inform Proc Syst 33:12449\u201312460","journal-title":"Adv Neural Inform Proc Syst"},{"key":"11197_CR4","unstructured":"Balestriero R, Ibrahim M, Sobal V, Morcos A, Shekhar S, Goldstein T, Bordes F, Bardes A, Mialon G, Tian Y, Schwarzschild A, Wilson AG, Geiping J, Garrido Q, Fernandez P, Bar A, Pirsiavash H, LeCun Y, Goldblum M (2023) A cookbook of self-supervised learning. arXiv preprint arXiv:2304.12210."},{"key":"11197_CR5","volume-title":"Learners\u2019 stories: difference and diversity in language learning","author":"P Benson","year":"2005","unstructured":"Benson P, Nunan D (2005) Learners\u2019 stories: difference and diversity in language learning. Cambridge University Press"},{"issue":"4","key":"11197_CR6","doi-asserted-by":"crossref","first-page":"383","DOI":"10.1093\/oxfordjournals.aje.a117645","volume":"142","author":"JA Berlin","year":"1995","unstructured":"Berlin JA (1995) Invited commentary: benefits of heterogeneity in meta-analysis of data from epidemiologic studies. Am J Epidemiol 142(4):383\u2013387","journal-title":"Am J Epidemiol"},{"issue":"7247","key":"11197_CR7","doi-asserted-by":"crossref","first-page":"1468","DOI":"10.1136\/bmj.320.7247.1468","volume":"320","author":"JM Bland","year":"2000","unstructured":"Bland JM, Altman DG (2000) The odds ratio. BMJ 320(7247):1468","journal-title":"BMJ"},{"key":"11197_CR8","doi-asserted-by":"crossref","unstructured":"Boateng G, Sels L, Kuppens P, Hilpert P, Kowatsch T (2020) Speech emotion recognition among couples using the peak-end rule and transfer learning. ICMI 2020 Companion - Companion Publication of the 2020 International Conference on Multimodal Interaction, pages 17\u201321","DOI":"10.1145\/3395035.3425253"},{"key":"11197_CR9","unstructured":"Buolamwini J, Gebru T (2018) Gender shades: Intersectional accuracy disparities in commercial gender classification. Proceedings of the 1st Conference on Fairness, Accountability, and Transparency, pages 77\u201391"},{"key":"11197_CR10","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso C, Bulut M, Lee C-C, Kazemzadeh A, Mower E, Kim S, Chang JN, Lee S, Narayanan SS (2008) Iemocap: interactive emotional dyadic motion capture database. Language Resour Evaluat 42:335\u2013359","journal-title":"Language Resour Evaluat"},{"key":"11197_CR11","doi-asserted-by":"crossref","first-page":"587","DOI":"10.1016\/j.procs.2016.08.239","volume":"96","author":"R Chakraborty","year":"2016","unstructured":"Chakraborty R, Pandharipande M, Kopparapu SK (2016) Knowledge-based framework for intelligent emotion recognition in spontaneous speech. Proc Computer Sci 96:587\u2013596","journal-title":"Proc Computer Sci"},{"issue":"6","key":"11197_CR12","doi-asserted-by":"crossref","first-page":"1505","DOI":"10.1109\/JSTSP.2022.3188113","volume":"16","author":"S Chen","year":"2022","unstructured":"Chen S, Wang C, Chen Z, Wu Y, Liu S, Chen Z, Li J, Kanda N, Yoshioka T, Xiao X, Wu J, Zhou L, Ren S, Qian Y, Qian Y, Wu J, Zeng M, Yu X, Wei F (2022) WavLM: large-scale self-supervised pre-training for full stack speech processing. IEEE J Select Topics Signal Proc 16(6):1505\u20131518","journal-title":"IEEE J Select Topics Signal Proc"},{"key":"11197_CR13","doi-asserted-by":"crossref","unstructured":"Chen F, Sun Z, Ouyang D, Liu X, Shao J (2021) Learning what and when to drop: Adaptive multimodal and contextual dynamics for emotion recognition in conversation. In Proceedings of the 29th ACM International Conference on Multimedia, pages 1064\u20131073","DOI":"10.1145\/3474085.3475661"},{"key":"11197_CR14","doi-asserted-by":"crossref","unstructured":"Chien W-S, Upadhyay SG, Lin W-C, Wu Y-T, Su B-H, Busso C, Lee C-C (2022) Monologue versus conversation: Differences in emotion perception and acoustic expressivity. In 2022 10th International Conference on Affective Computing and Intelligent Interaction (ACII), pages 1\u20137. IEEE","DOI":"10.1109\/ACII55700.2022.9953814"},{"key":"11197_CR15","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1093\/biomet\/37.3-4.256","volume":"37","author":"WG Cochran","year":"1950","unstructured":"Cochran WG (1950) The comparison of percentages in matched samples. Biometrika 37:256\u2013266","journal-title":"Biometrika"},{"issue":"3","key":"11197_CR16","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1007\/BF02310555","volume":"16","author":"LJ Cronbach","year":"1951","unstructured":"Cronbach LJ (1951) Coefficient alpha and the internal structure of tests. Psychometrika 16(3):297\u2013334","journal-title":"Psychometrika"},{"key":"11197_CR17","unstructured":"Dattani HRS, Roser M (2021) Mental health. Our World in Data, 2021. https:\/\/ourworldindata.org\/mental-health"},{"key":"11197_CR18","unstructured":"U.S.\u00a0Department of\u00a0Health and Human Services (1996) Health insurance portability and accountability act of 1996"},{"key":"11197_CR19","doi-asserted-by":"crossref","unstructured":"Deschamps-Berger T, Lamel L, Devillers L (2021) End-to-end speech emotion recognition: challenges of real-life emergency call centers data recordings. In 2021 9th International Conference on Affective Computing and Intelligent Interaction (ACII), pages 1\u20138. IEEE","DOI":"10.1109\/ACII52823.2021.9597419"},{"key":"11197_CR20","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1126\/science.164.3875.86","volume":"164","author":"P Ekman","year":"1969","unstructured":"Ekman P, Sorenson ER, Friesen WV (1969) Pan-cultural elements in facial displays of emotion. Science 164:86\u201388","journal-title":"Science"},{"key":"11197_CR21","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1016\/0010-0277(92)90002-Y","volume":"44","author":"NL Etcoff","year":"1992","unstructured":"Etcoff NL, Magee JJ (1992) Categorical perception of facial expressions. Cognition 44:227\u2013240","journal-title":"Cognition"},{"key":"11197_CR22","doi-asserted-by":"crossref","first-page":"1803","DOI":"10.1109\/TASLP.2022.3171965","volume":"30","author":"W Fan","year":"2022","unstructured":"Fan W, Xiangmin X, Cai B, Xing X (2022) Isnet: Individual standardization network for speech emotion recognition. IEEE\/ACM Trans Audio Speech Language Proc 30:1803\u20131814","journal-title":"IEEE\/ACM Trans Audio Speech Language Proc"},{"key":"11197_CR23","doi-asserted-by":"crossref","unstructured":"Fan W, Xu X, Xing X, Chen W, Huang D (2021) Lssed: a large-scale dataset and benchmark for speech emotion recognition. In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pages 641\u2013645. IEEE","DOI":"10.1109\/ICASSP39728.2021.9414542"},{"key":"11197_CR24","doi-asserted-by":"crossref","DOI":"10.4324\/9781315086071","volume-title":"The laws of emotion","author":"NH Frijda","year":"2017","unstructured":"Frijda NH (2017) The laws of emotion. Psychology Press"},{"key":"11197_CR25","doi-asserted-by":"crossref","unstructured":"Galanis D, Karabetsos S, Koutsombogera M, Papageorgiou H, Esposito A, Riviello MT (2013) Classification of emotional speech units in call centre interactions. 4th IEEE International Conference on Cognitive Infocommunications, CogInfoCom 2013 - Proceedings, pages 403\u2013406","DOI":"10.1109\/CogInfoCom.2013.6719279"},{"key":"11197_CR26","unstructured":"Gliem JA, Gliem RR (2003) Calculating, interpreting, and reporting cronbach\u2019s alpha reliability coefficient for likert-type scales. In Midwest Research-to-Practice Conference in Adult, Continuing, and Community Education"},{"key":"11197_CR27","unstructured":"Hameed IA, Sana I, Khan DI, Ishaq Z, Nasir K, Urooj A (2018) Bers: Bussiness-related emotion recognition system in urdu language using machine learning artificial intelligence for winch design view project durable interaction with socially intelligent robots (isociobot) view project bers: Bussiness-related emotion recognition system in urdu language using machine learning. 2018 5th International Conference on Behavioral, Economic, and Socio-Cultural Computing (BESC)"},{"key":"11197_CR28","first-page":"23","volume":"1","author":"P Hancock","year":"2011","unstructured":"Hancock P, Lichtenstein JTM (2011) The influence of touch on virtual agents. J Human-Robot Interact 1:23\u201335","journal-title":"J Human-Robot Interact"},{"key":"11197_CR29","doi-asserted-by":"crossref","unstructured":"Hazarika D, Poria S, Mihalcea R, Cambria E, Zimmermann R (2018) ICON: Interactive conversational memory network for multimodal emotion detection. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 2594\u20132604, Brussels, Belgium, October-November 2018. Association for Computational Linguistics","DOI":"10.18653\/v1\/D18-1280"},{"key":"11197_CR30","doi-asserted-by":"crossref","unstructured":"Hazarika D, Poria S, Zadeh A, Cambria E, Morency L-P, Zimmermann R (2018) Conversational memory network for emotion recognition in dyadic dialogue videos. In Proceedings of the conference. Association for Computational Linguistics. North American Chapter. Meeting, volume 2018, page 2122. NIH Public Access","DOI":"10.18653\/v1\/N18-1193"},{"key":"11197_CR31","doi-asserted-by":"crossref","first-page":"135","DOI":"10.15801\/je.1.124.201903.135","volume":"1","author":"J He","year":"2019","unstructured":"He J, Zeng X (2019) Surveillance technologies and ethics: perspectives from AI. AI Ethics 1:135\u2013149","journal-title":"AI Ethics"},{"key":"11197_CR32","doi-asserted-by":"crossref","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"WN Hsu","year":"2021","unstructured":"Hsu WN, Bolte B, Tsai YHH, Lakhotia K, Salakhutdinov R, Mohamed A (2021) HuBERT: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans Audio Speech Language Proc 29:3451\u20133460","journal-title":"IEEE\/ACM Trans Audio Speech Language Proc"},{"key":"11197_CR33","doi-asserted-by":"crossref","first-page":"1675","DOI":"10.1109\/TASLP.2021.3076364","volume":"29","author":"JH Hsu","year":"2021","unstructured":"Hsu JH, Su MH, Wu CH, Chen YH (2021) Speech emotion recognition considering nonverbal vocalization in affective conversations. IEEE\/ACM Trans Audio Speech Language Proc 29:1675\u20131686","journal-title":"IEEE\/ACM Trans Audio Speech Language Proc"},{"key":"11197_CR34","doi-asserted-by":"crossref","unstructured":"Huang K-Y, Wu C-H, Hong Q-B, Su M-H, Zeng Y-R (2018) Speech emotion recognition using convolutional neural network with audio word-based embedding. In 2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP), pages 265\u2013269. IEEE","DOI":"10.1109\/ISCSLP.2018.8706610"},{"key":"11197_CR35","doi-asserted-by":"crossref","unstructured":"Huang K.-Y., Wu, C.-H., Hong, Q.-B., Su, M.-H., Chen, Y.-H (2019) Speech emotion recognition using deep neural network considering verbal and nonverbal speech sounds. In ICASSP 2019 - 2019 IEEE international conference on acoustics, speech and signal processing (ICASSP)","DOI":"10.1109\/ICASSP.2019.8682283"},{"key":"11197_CR36","doi-asserted-by":"crossref","first-page":"897","DOI":"10.1007\/s10772-017-9457-6","volume":"20","author":"A Jacob","year":"2017","unstructured":"Jacob A (2017) Modelling speech emotion recognition using logistic regression and decision trees. Int J Speech Technol 20:897\u2013905","journal-title":"Int J Speech Technol"},{"key":"11197_CR37","doi-asserted-by":"crossref","unstructured":"Jamil N, Apandi F, Hamzah R (2017) Influences of age in emotion recognition of spontaneous speech: A case of an under-resourced language. In 2017 International conference on speech technology and human-computer dialogue (SpeD), pages 1\u20136. IEEE","DOI":"10.1109\/SPED.2017.7990448"},{"key":"11197_CR38","doi-asserted-by":"crossref","unstructured":"Jin X, Yu J, Ding Z, Xia R, Zhou X, Tu Y (2020) Hierarchical multimodal transformer with localness and speaker aware attention for emotion recognition in conversations. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 12431 LNAI:41\u201353","DOI":"10.1007\/978-3-030-60457-8_4"},{"issue":"1","key":"11197_CR39","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1038\/s41597-023-02248-2","volume":"10","author":"S Kang","year":"2023","unstructured":"Kang S, Choi W, Park CY, Cha N, Kim A, Khandoker AH, Hadjileontiadis L, Kim H, Jeong Y, Lee U (2023) K-emophone: a mobile and wearable dataset with in-situ emotion, stress, and attention labels. Sci Data 10(1):351","journal-title":"Sci Data"},{"key":"11197_CR40","doi-asserted-by":"crossref","unstructured":"Kharat A, Patel A, Bhatt D, Parikh N, Rathore H (2021) Emotion recognition using multimodalities. Adv Intell Syst Comput 1375 AIST:309\u2013319","DOI":"10.1007\/978-3-030-73050-5_31"},{"key":"11197_CR41","doi-asserted-by":"crossref","unstructured":"Kov\u00e1cs G (2018) Classification of formal and informal dialogues based on emotion recognition features. In Text, Speech, and Dialogue: 21st International Conference, TSD 2018, Brno, Czech Republic, September 11-14, 2018, Proceedings, pages 518\u2013526. Springer","DOI":"10.1007\/978-3-030-00794-2_56"},{"key":"11197_CR42","doi-asserted-by":"crossref","unstructured":"Kwon, Mustaqeem and Soonil (2020) Clstm: Deep feature-based speech emotion recognition using the hierarchical convlstm network. Mathematics 2020, 8:2133","DOI":"10.3390\/math8122133"},{"issue":"4","key":"11197_CR43","doi-asserted-by":"crossref","first-page":"1031","DOI":"10.3233\/IDA-205183","volume":"25","author":"H Lai","year":"2021","unstructured":"Lai H, Keke W, Li L (2021) Multimodal emotion recognition with hierarchical memory networks. Intell Data Anal 25(4):1031\u20131045","journal-title":"Intell Data Anal"},{"issue":"1","key":"11197_CR44","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1016\/j.pec.2004.12.003","volume":"60","author":"JJA Landsman-Dijkstra","year":"2006","unstructured":"Landsman-Dijkstra JJA, van Wijck R, Groothoff JW (2006) The long-term lasting effectiveness on self-efficacy, attribution style, expression of emotions and quality of life of a body awareness program for chronic a-specific psychosomatic symptoms. Patient Educ Counsel 60(1):66\u201379","journal-title":"Patient Educ Counsel"},{"issue":"2","key":"11197_CR45","doi-asserted-by":"crossref","first-page":"992","DOI":"10.1109\/TAFFC.2020.2983669","volume":"13","author":"S Latif","year":"2020","unstructured":"Latif S, Rana R, Khalifa S, Jurdak R, Epps J, Schuller BW (2020) Multi-task semi-supervised adversarial autoencoding for speech emotion recognition. IEEE Trans Affect Comput 13(2):992\u20131004","journal-title":"IEEE Trans Affect Comput"},{"key":"11197_CR46","doi-asserted-by":"crossref","first-page":"1072","DOI":"10.1080\/0144929X.2020.1741684","volume":"40","author":"ELC Law","year":"2021","unstructured":"Law ELC, Soleimani S, Watkins D, Barwick J (2021) Automatic voice emotion recognition of child-parent conversations in natural settings. Behav Inform Technol 40:1072\u20131089","journal-title":"Behav Inform Technol"},{"key":"11197_CR47","doi-asserted-by":"crossref","first-page":"4782","DOI":"10.3390\/app11114782","volume":"11","author":"HC Li","year":"2021","unstructured":"Li HC, Pan T, Lee MH, Chiu HW (2021) Make patient consultation warmer: a clinical application for speech emotion recognition. Appl Sci 11:4782","journal-title":"Appl Sci"},{"key":"11197_CR48","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1016\/j.neucom.2021.05.017","volume":"454","author":"Z Lian","year":"2021","unstructured":"Lian Z, Liu B, Tao J (2021) Decn: dialogical emotion correction network for conversational emotion recognition. Neurocomputing 454:483\u2013495","journal-title":"Neurocomputing"},{"key":"11197_CR49","unstructured":"Lian Z, Liu B, Tao J (2022) Pirnet: Personality-enhanced iterative refinement network for emotion recognition in conversation. IEEE Trans Neural Netw Learn Syst"},{"key":"11197_CR50","doi-asserted-by":"crossref","unstructured":"Lin J-C, Wei W-L, Wu C-H, Wang, H-M (2014) Emotion recognition of conversational affective speech using temporal course modeling-based error weighted cross-correlation model. In Signal and information processing association annual summit and conference (APSIPA), 2014 Asia-Pacific, pages 1\u20137. IEEE","DOI":"10.1109\/APSIPA.2014.7041621"},{"issue":"10","key":"11197_CR51","doi-asserted-by":"crossref","first-page":"36","DOI":"10.1145\/3233231","volume":"61","author":"ZC Lipton","year":"2018","unstructured":"Lipton ZC (2018) The mythos of model interpretability. Commun ACM 61(10):36\u201343","journal-title":"Commun ACM"},{"key":"11197_CR52","doi-asserted-by":"crossref","unstructured":"Li P, Song Y, McLoughlin I, Guo W, Dai L (2018) An attention pooling based representation learning method for speech emotion recognition. Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH, 2018-September:3087\u20133091","DOI":"10.21437\/Interspeech.2018-1242"},{"key":"11197_CR53","unstructured":"Liu J, Kong J, Zhang X (2022) Study on differences between patients with physiological and psychological diseases in online health communities: Topic analysis and sentiment analysis. Int J Environ Res Public Health Article"},{"key":"11197_CR54","doi-asserted-by":"crossref","unstructured":"Liu J, Song Y, Wang L, Dang J, Yu R (2021) Time-frequency representation learning with graph convolutional network for dialogue-level speech emotion recognition. In Interspeech, pages 4523\u20134527","DOI":"10.21437\/Interspeech.2021-2067"},{"key":"11197_CR55","doi-asserted-by":"crossref","unstructured":"Lu CC, Li JL, Lee CC (2018) Learning an arousal-valence speech front-end network using media data in-the-wild for emotion recognition. In Proceedings of the 2018 on Audio\/Visual Emotion Challenge and Workshop, pages 99\u2013105","DOI":"10.1145\/3266302.3266306"},{"key":"11197_CR56","doi-asserted-by":"crossref","unstructured":"Lubis N, Sakti S, Neubig G, Yoshino K, Toda T, Nakamura S (2016) A study of social-affective communication: Automatic prediction of emotion triggers and responses in television talk shows. 2015 IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2015 - Proceedings, pages 777\u2013783","DOI":"10.1109\/ASRU.2015.7404867"},{"issue":"2","key":"11197_CR57","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1109\/TAFFC.2016.2598569","volume":"9","author":"R Malheiro","year":"2016","unstructured":"Malheiro R, Panda R, Gomes P, Paiva RP (2016) Emotionally-relevant features for classification and regression of music lyrics. IEEE Trans Affect Comput 9(2):240\u2013254","journal-title":"IEEE Trans Affect Comput"},{"key":"11197_CR58","doi-asserted-by":"crossref","first-page":"551","DOI":"10.3389\/fnhum.2013.00551","volume":"7","author":"YT Matsuda","year":"2013","unstructured":"Matsuda YT, Fujimura T, Katahira K, Okada M, Ueno K, Cheng K, Okanoya K (2013) The implicit processing of categorical and dimensional strategies: An fMRI study of facial emotion perception. Front Human Neurosci 7:551","journal-title":"Front Human Neurosci"},{"issue":"4","key":"11197_CR59","doi-asserted-by":"crossref","first-page":"388","DOI":"10.1001\/jama.2017.19163","volume":"319","author":"MDF McInnes","year":"2018","unstructured":"McInnes MDF, Moher D, Thombs BD, McGrath TA, Bossuyt PM, Clifford T, Cohen JF, Deeks JJ, Gatsonis C, Hooft L et al (2018) Preferred reporting items for a systematic review and meta-analysis of diagnostic test accuracy studies: the prisma-dta statement. JAMA 319(4):388\u2013396","journal-title":"JAMA"},{"issue":"4","key":"11197_CR60","doi-asserted-by":"crossref","first-page":"264","DOI":"10.7326\/0003-4819-151-4-200908180-00135","volume":"151","author":"D Moher","year":"2009","unstructured":"Moher D, Liberati A, Tetzlaff J, Altman DG, the PRISMA\u00a0Group* (2009) Preferred reporting items for systematic reviews and meta-analyses: the prisma statement. Annal Internal Med 151(4):264\u2013269","journal-title":"Annal Internal Med"},{"key":"11197_CR61","doi-asserted-by":"crossref","unstructured":"Nahar M, Ali ME (2022) A deep ensemble approach of anger detection from audio-textual conversations. In 2022 10th International Conference on Affective Computing and Intelligent Interaction (ACII), pages 1\u20138. IEEE","DOI":"10.1109\/ACII55700.2022.9953866"},{"key":"11197_CR62","doi-asserted-by":"crossref","unstructured":"Neumann M et\u00a0al (2018) Cross-lingual and multilingual speech emotion recognition on english and french. In 2018 IEEE international conference on acoustics, Sspeech and signal processing (ICASSP), pages 5769\u20135773. IEEE","DOI":"10.1109\/ICASSP.2018.8462162"},{"key":"11197_CR63","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s13643-016-0384-4","volume":"5","author":"M Ouzzani","year":"2016","unstructured":"Ouzzani M, Hammady H, Fedorowicz Z, Elmagarmid A (2016) Rayyan-a web and mobile app for systematic reviews. Syst Rev 5:1\u201310","journal-title":"Syst Rev"},{"key":"11197_CR64","doi-asserted-by":"crossref","unstructured":"Pappagari R, Zelasko P, Villalba J, Moro-Velazquez L, Dehak N (2021) Beyond isolated utterances: Conversational emotion recognition. 2021 IEEE automatic speech recognition and understanding workshop, ASRU 2021 - Proceedings, pages 39\u201346","DOI":"10.1109\/ASRU51503.2021.9687971"},{"issue":"1","key":"11197_CR65","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1038\/s41597-020-00630-y","volume":"7","author":"CY Park","year":"2020","unstructured":"Park CY, Cha N, Kang S, Kim A, Khandoker AH, Hadjileontiadis L, Oh A, Jeong Y, Lee U (2020) K-emocon, a multimodal sensor dataset for continuous emotion recognition in naturalistic conversations. Sci Data 7(1):293","journal-title":"Sci Data"},{"key":"11197_CR66","doi-asserted-by":"crossref","unstructured":"Poria S, Hazarika D, Majumder N, Naik G, Cambria E (2019) Meld: A multimodal multi-party dataset for emotion recognition in conversations. In Proceedings of the 20th annual SIGdial meeting on discourse and dialogue, pages 62\u201372","DOI":"10.18653\/v1\/P19-1050"},{"key":"11197_CR67","doi-asserted-by":"crossref","unstructured":"Poria S, Hazarika D, Majumder N, Naik G, Cambria E, Mihalcea R (2018) Meld: A multimodal multi-party dataset for emotion recognition in conversations. arXiv preprint arXiv:1810.02508","DOI":"10.18653\/v1\/P19-1050"},{"key":"11197_CR68","doi-asserted-by":"crossref","unstructured":"Poria S, Mazumder N, Cambria E, Hazarika D, Morency LP, Zadeh A (2017) Context-dependent sentiment analysis in user-generated videos. ACL 2017 - 55th Annual Meeting of the Association for Computational Linguistics, Proceedings of the Conference (Long Papers), 1:873\u2013883","DOI":"10.18653\/v1\/P17-1081"},{"issue":"16","key":"11197_CR69","doi-asserted-by":"crossref","first-page":"5317","DOI":"10.3390\/s21165317","volume":"21","author":"J Quan","year":"2021","unstructured":"Quan J, Miyake Y, Nozawa T (2021) Incorporating interpersonal synchronization features for automatic emotion recognition from visual and audio data during communication. Sensors 21(16):5317","journal-title":"Sensors"},{"key":"11197_CR70","doi-asserted-by":"crossref","unstructured":"Ribeiro MT, Singh S, Guestrin C (2016) Why should i trust you? explaining the predictions of any classifier. Proceedings of the 22nd ACM SIGKDD International conference on knowledge discovery and data mining, pages 1135\u20131144","DOI":"10.1145\/2939672.2939778"},{"issue":"6","key":"11197_CR71","doi-asserted-by":"crossref","first-page":"1161","DOI":"10.1037\/h0077714","volume":"39","author":"JA Russell","year":"1980","unstructured":"Russell JA (1980) A circumplex model of affect. J Personal Soc Psychol 39(6):1161","journal-title":"J Personal Soc Psychol"},{"key":"11197_CR72","doi-asserted-by":"crossref","unstructured":"Saffaryazdi N, Goonesekera Y, Saffaryazdi N, Hailemariam ND, Temesgen EG, Nanayakkara S, Broadbent E, Billinghurst M (2022) Emotion recognition in conversations using brain and physiological signals. International conference on intelligent user interfaces, Proceedings IUI, pages 229\u2013242","DOI":"10.1145\/3490099.3511148"},{"key":"11197_CR73","doi-asserted-by":"crossref","first-page":"79861","DOI":"10.1109\/ACCESS.2020.2990405","volume":"8","author":"M Sajjad","year":"2020","unstructured":"Sajjad M, Kwon S (2020) Clustering-based speech emotion recognition by incorporating learned features and deep bilstm. IEEE Access 8:79861\u201379875","journal-title":"IEEE Access"},{"key":"11197_CR74","doi-asserted-by":"crossref","unstructured":"Scherer K, Johnstone T (2003) Vocal expression of emotion. Handbook of Affective Sciences, pages 433\u2013456","DOI":"10.1093\/oso\/9780195126013.003.0023"},{"issue":"7","key":"11197_CR75","doi-asserted-by":"crossref","first-page":"1428","DOI":"10.3390\/sym14071428","volume":"14","author":"J Seo","year":"2022","unstructured":"Seo J, Lee B (2022) Multi-task conformer with multi-feature combination for speech emotion recognition. Symmetry 14(7):1428","journal-title":"Symmetry"},{"key":"11197_CR76","volume-title":"Algorithms, humans, and interactions: how do algorithms interact with people? designing meaningful AI experiences","author":"DD Shin","year":"2023","unstructured":"Shin DD (2023) Algorithms, humans, and interactions: how do algorithms interact with people? designing meaningful AI experiences. Taylor and Francis"},{"key":"11197_CR77","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1016\/j.neucom.2022.04.028","volume":"492","author":"YB Singh","year":"2022","unstructured":"Singh YB, Goel S (2022) A systematic literature review of speech emotion recognition approaches. Neurocomputing 492:245\u2013263","journal-title":"Neurocomputing"},{"issue":"2","key":"11197_CR78","doi-asserted-by":"crossref","first-page":"100410","DOI":"10.1016\/j.patter.2021.100410","volume":"3","author":"D Spathis","year":"2022","unstructured":"Spathis D, Perez-Pozuelo I, Marques-Fernandez L, Mascolo C (2022) Breaking away from labels: the promise of self-supervised machine learning in intelligent health. Patterns 3(2):100410","journal-title":"Patterns"},{"key":"11197_CR79","doi-asserted-by":"crossref","unstructured":"Sterne JAC, Hern\u00e1n MA, Reeves BC, Savovi\u0107 J, Berkman ND, Viswanathan M, Henry D, Altman DG, Ansari MT, Boutron I (2016) et\u00a0al. Robins-i: a tool for assessing risk of bias in non-randomised studies of interventions. BMJ, 355","DOI":"10.1136\/bmj.i4919"},{"issue":"2","key":"11197_CR80","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1177\/1536867X0400400204","volume":"4","author":"JAC Sterne","year":"2004","unstructured":"Sterne JAC, Harbord RM (2004) Funnel plots in meta-analysis. Stata J 4(2):127\u2013141","journal-title":"Stata J"},{"issue":"4","key":"11197_CR81","doi-asserted-by":"crossref","first-page":"537","DOI":"10.1002\/jrsm.1260","volume":"8","author":"R Suurmond","year":"2017","unstructured":"Suurmond R, van Rhee H, Hak T (2017) Introduction, comparison, and validation of meta-essentials: a free and simple tool for meta-analysis. Res Synth Methods 8(4):537\u2013553","journal-title":"Res Synth Methods"},{"key":"11197_CR82","doi-asserted-by":"crossref","unstructured":"Sweeney L (2013) Discrimination in online ad delivery. ACM Digital Library","DOI":"10.2139\/ssrn.2208240"},{"key":"11197_CR83","doi-asserted-by":"crossref","first-page":"613","DOI":"10.3389\/fpsyg.2018.00613","volume":"9","author":"B t Hart","year":"2018","unstructured":"t Hart B, Struiksma ME, van Boxtel A, Van Berkum JJ (2018) Emotion in stories: facial EMG evidence for both mental simulation and moral evaluation. Front Psychol 9:613","journal-title":"Front Psychol"},{"key":"11197_CR84","doi-asserted-by":"crossref","first-page":"1273","DOI":"10.1007\/s11165-016-9602-2","volume":"48","author":"KS Taber","year":"2018","unstructured":"Taber KS (2018) The use of cronbach\u2019s alpha when developing and reporting research instruments in science education. Res Sci Educ 48:1273\u20131296","journal-title":"Res Sci Educ"},{"key":"11197_CR85","doi-asserted-by":"crossref","first-page":"1280235","DOI":"10.3389\/fdgth.2024.1280235","volume":"6","author":"A Thakkar","year":"2024","unstructured":"Thakkar A, Gupta A, De Sousa A (2024) Artificial intelligence in positive mental health: a narrative review. Front Digital Health 6:1280235","journal-title":"Front Digital Health"},{"key":"11197_CR86","doi-asserted-by":"crossref","unstructured":"Tian L, Moore JD, Lai C (2015) Emotion recognition in spontaneous and acted dialogues. In 2015 International conference on affective computing and intelligent interaction, ACII 2015, pages 698\u2013704","DOI":"10.1109\/ACII.2015.7344645"},{"key":"11197_CR87","doi-asserted-by":"crossref","unstructured":"Tian L, Moore JD, Lai C (2015) Recognizing emotions in dialogues with acoustic and lexical features. In 2015 International conference on affective computing and intelligent interaction (ACII), pages 737\u2013742. IEEE","DOI":"10.1109\/ACII.2015.7344651"},{"issue":"6","key":"11197_CR88","doi-asserted-by":"crossref","first-page":"510","DOI":"10.1007\/s10865-009-9225-4","volume":"32","author":"J Tully Phillip","year":"2009","unstructured":"Tully Phillip J, Baker Robert A, Turnbull Deborah A, Winefield Helen R, Knight JL (2009) Negative emotions and quality of life six months after cardiac surgery: the dominant role of depression not anxiety symptoms. J Behav Med 32(6):510\u2013522","journal-title":"J Behav Med"},{"key":"11197_CR89","unstructured":"Union European. Regulation (eu) 2016\/679 of the european parliament and of the council"},{"issue":"4","key":"11197_CR90","doi-asserted-by":"crossref","first-page":"1414","DOI":"10.3390\/s22041414","volume":"22","author":"L Van Trinh","year":"2022","unstructured":"Van Trinh L et al (2022) Emotional speech recognition using deep neural networks. Sensors 22(4):1414","journal-title":"Sensors"},{"key":"11197_CR91","volume-title":"Fairness and machine learning: limitations and opportunities","author":"S Venkatasubramanian","year":"2019","unstructured":"Venkatasubramanian S, Hardt M, Arvind N (2019) Fairness and machine learning: limitations and opportunities. MIT Press"},{"key":"11197_CR92","doi-asserted-by":"crossref","unstructured":"Waelbers B, Bromuri S, Henkel AP (2022) Comparing neural networks for speech emotion recognition in customer service interactions. In 2022 International joint conference on neural networks (IJCNN), pages 1\u20138. IEEE","DOI":"10.1109\/IJCNN55064.2022.9892165"},{"issue":"9","key":"11197_CR93","doi-asserted-by":"crossref","first-page":"10745","DOI":"10.1109\/TPAMI.2023.3263585","volume":"45","author":"J Wagner","year":"2023","unstructured":"Wagner J, Triantafyllopoulos A, Wierstorf H, Schmitt M, Burkhardt F, Eyben F, Schuller BW (2023) Dawn of the transformer era in speech emotion recognition: closing the valence gap. IEEE Trans Pattern Anal Mach Intell 45(9):10745\u201310759","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"11197_CR94","doi-asserted-by":"crossref","first-page":"334","DOI":"10.1001\/jamapsychiatry.2014.2502","volume":"72","author":"ER Walker","year":"2015","unstructured":"Walker ER, McGee RE, Druss BG (2015) Mortality in mental disorders and global disease burden implications: a systematic review and meta-analysis. JAMA Psychiatr 72(4):334\u2013341","journal-title":"JAMA Psychiatr"},{"key":"11197_CR95","doi-asserted-by":"crossref","first-page":"129","DOI":"10.14257\/ijsh.2016.10.8.14","volume":"10","author":"Z Wan","year":"2016","unstructured":"Wan Z (2016) Smart home entertainment system with personalized recommendation and speech emotion recognition support. Int J Smart Home 10:129\u2013142","journal-title":"Int J Smart Home"},{"key":"11197_CR96","doi-asserted-by":"crossref","unstructured":"Wang B, Dong G, Zhao Y, Li R, Cao Q, Hu K, Jiang D (2023) Hierarchically stacked graph convolution for emotion recognition in conversation. Knowl-Based Syst, page 110285","DOI":"10.1016\/j.knosys.2023.110285"},{"key":"11197_CR97","doi-asserted-by":"crossref","unstructured":"Wang T, Hou Y, Zhou D, Zhang Q (2021) A contextual attention network for multimodal emotion recognition in conversation. In 2021 International joint conference on neural networks (IJCNN), pages 1\u20137. IEEE","DOI":"10.1109\/IJCNN52387.2021.9533718"},{"key":"11197_CR98","doi-asserted-by":"crossref","unstructured":"Wu Y-H, Lin S-J, Yang D-L (2013) A mobile emotion recognition system based on speech signals and facial images. In 2013 International computer science and engineering conference (ICSEC), pages 212\u2013217. IEEE","DOI":"10.1109\/ICSEC.2013.6694781"},{"key":"11197_CR99","doi-asserted-by":"crossref","unstructured":"Wu W, Zhang C, Woodland PC (2023) Distribution-based emotion recognition in conversation. In 2022 IEEE Spoken language technology workshop (SLT), pages 860\u2013867","DOI":"10.1109\/SLT54892.2023.10022800"},{"key":"11197_CR100","doi-asserted-by":"crossref","first-page":"6486","DOI":"10.1038\/s41598-019-42826-2","volume":"9","author":"C Xiefeng","year":"2019","unstructured":"Xiefeng C, Wang Y, Dai S, Zhao P, Liu Q (2019) Heart sound signals can be used for emotion recognition. Sci Rep 9:6486","journal-title":"Sci Rep"},{"key":"11197_CR101","doi-asserted-by":"crossref","unstructured":"Xie B, Sidulova M, Park CH (2021) Robust multimodal emotion recognition from conversation with transformer-based crossmodality fusion. Sensors, 21(14)","DOI":"10.3390\/s21144913"},{"key":"11197_CR102","doi-asserted-by":"crossref","unstructured":"Yang SW, Chi PH, Chuang YS, Lai C, Jeff, Lakhotia K, Lin YY, Liu AT, Shi J, Chang X, Lin GT, Huang TH, Tseng WC, Lee KT, Liu DR, Huang Z, Dong S, Li SW, Watanabe S, Mohamed A, Lee HY (2021) SUPERB: Speech Processing Universal PERformance Benchmark. In Proceedings of the annual conference of the Iinternational speech communication association, INTERSPEECH, 4:1194\u20131198","DOI":"10.21437\/Interspeech.2021-1775"},{"key":"11197_CR103","doi-asserted-by":"crossref","unstructured":"Yeh SL, Lin YS, Lee CC (2019) An interaction-aware attention network for speech emotion recognition in spoken dialogs. In ICASSP, IEEE International conference on acoustics, speech and Ssignal processing - Proceedings pp. 6685\u20136689","DOI":"10.1109\/ICASSP.2019.8683293"},{"key":"11197_CR104","doi-asserted-by":"crossref","first-page":"107721","DOI":"10.1016\/j.apacoust.2020.107721","volume":"173","author":"S Yildirim","year":"2021","unstructured":"Yildirim S, Kaya Y, K\u0131l\u0131\u00e7 F (2021) A modified feature selection method based on metaheuristic algorithms for speech emotion recognition. Appl Acoustics 173:107721","journal-title":"Appl Acoustics"},{"key":"11197_CR105","doi-asserted-by":"crossref","unstructured":"Yusuf SM, Adedokun EA, Mu\u2019azu MB, Umoh IJ, Ibrahim AA (2021) A novel multi-window spectrogram augmentation approach for speech emotion recognition using deep learning. In 2021 1st International conference on multidisciplinary engineering and applied science, ICMEAS 2021","DOI":"10.1109\/ASYU52992.2021.9598956"},{"key":"11197_CR106","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1016\/j.inffus.2023.01.005","volume":"93","author":"Y Zhang","year":"2023","unstructured":"Zhang Y, Wang J, Yaochen Liu L, Rong QZ, Song D, Tiwari P, Qin J (2023) A multitask learning model for multimodal sarcasm, sentiment and emotion recognition in conversations. Inform Fusion 93:282\u2013301","journal-title":"Inform Fusion"},{"key":"11197_CR107","doi-asserted-by":"crossref","unstructured":"Zhang Y, Li Q, Song D, Zhang P, Wang P (2019) Quantum-inspired interactive networks for conversational sentiment analysis. In Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI-19, pages 5436\u20135442. International Joint Conferences on Artificial Intelligence Organization, 7","DOI":"10.24963\/ijcai.2019\/755"},{"key":"11197_CR108","doi-asserted-by":"crossref","unstructured":"Zhang J, Liu Z, Liu P, Wu B (2021) Dual-waveform emotion recognition model for conversations. In 2021 IEEE International conference on multimedia and expo (ICME), pages 1\u20136. IEEE","DOI":"10.1109\/ICME51207.2021.9428327"},{"issue":"01","key":"11197_CR109","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1109\/TAFFC.2022.3175578","volume":"14","author":"K Zhou","year":"2023","unstructured":"Zhou K, Sisman B, Rana R, Schuller BW, Li H (2023) Emotion intensity and its control for emotional voice conversion. IEEE Trans Affect Comput 14(01):31\u201348","journal-title":"IEEE Trans Affect Comput"},{"key":"11197_CR110","doi-asserted-by":"crossref","first-page":"109978","DOI":"10.1016\/j.knosys.2022.109978","volume":"258","author":"SH Zou","year":"2022","unstructured":"Zou SH, Huang X, Shen XD, Liu H (2022) Improving multimodal fusion with main modal transformer for emotion recognition in conversation. Knowl-Based Syst 258:109978","journal-title":"Knowl-Based Syst"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11197-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-025-11197-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11197-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T13:40:44Z","timestamp":1747143644000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-025-11197-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,11]]},"references-count":110,"journal-issue":{"issue":"7","published-online":{"date-parts":[[2025,7]]}},"alternative-id":["11197"],"URL":"https:\/\/doi.org\/10.1007\/s10462-025-11197-8","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-3374582\/v1","asserted-by":"object"}]},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,11]]},"assertion":[{"value":"7 March 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 April 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"None declared.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"198"}}