{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T05:35:30Z","timestamp":1772602530492,"version":"3.50.1"},"reference-count":77,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s10772-025-10202-3","type":"journal-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T09:00:10Z","timestamp":1750669210000},"page":"541-563","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["A comprehensive review in affective computing: an exploration of artificial intelligence in unimodal and multimodal emotion recognition systems"],"prefix":"10.1007","volume":"28","author":[{"given":"Ajay Babasaheb","family":"Kapase\u00b7","sequence":"first","affiliation":[]},{"given":"Nilesh","family":"Uke","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,23]]},"reference":[{"key":"10202_CR1","doi-asserted-by":"publisher","first-page":"49265","DOI":"10.1109\/access.2022.3172954","volume":"10","author":"AA Abdelhamid","year":"2022","unstructured":"Abdelhamid, A. A., El-Kenawy, E. S. M., Alotaibi, B., Amer, G. M., Abdelkader, M. Y., Ibrahim, A., & Eid, M. M. (2022). Robust speech emotion recognition using CNN\u2009+\u2009LSTM based on stochastic fractal search optimization algorithm. IEEE Access: Practical Innovations, Open Solutions, 10, 49265\u201349284. https:\/\/doi.org\/10.1109\/access.2022.3172954","journal-title":"IEEE Access: Practical Innovations, Open Solutions"},{"key":"10202_CR2","unstructured":"Ahire, V., Shah, K., Khan, M. N., Pakhale, N., Sookha, L. R., Ganaie, M. A., & Dhall, A. (2025). MAVEN: Multi-modal Attention for Valence-Arousal Emotion Network. arXiv preprint arXiv:2503.12623."},{"key":"10202_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.jer.2024.03.021","author":"MA Almulla","year":"2024","unstructured":"Almulla, M. A. (2024). A multimodal emotion recognition system using deep Convolution neural networks. Journal of Engineering Research. https:\/\/doi.org\/10.1016\/j.jer.2024.03.021","journal-title":"Journal of Engineering Research"},{"key":"10202_CR4","doi-asserted-by":"publisher","first-page":"110494","DOI":"10.1016\/j.asoc.2023.110494","volume":"144","author":"A Aslam","year":"2023","unstructured":"Aslam, A., Sargano, A. B., & Habib, Z. (2023). Attention-based multimodal sentiment analysis and emotion recognition using deep neural networks. Applied Soft Computing, 144, 110494. https:\/\/doi.org\/10.1016\/j.asoc.2023.110494","journal-title":"Applied Soft Computing"},{"issue":"21","key":"10202_CR5","doi-asserted-by":"publisher","first-page":"9890","DOI":"10.3390\/app14219890","volume":"14","author":"D BAEK","year":"2024","unstructured":"BAEK, D., Yoon, J., & Kim, J. (2024). Data augmentation using GANs for enhanced speech emotion recognition. Applied Sciences, 14(21), 9890. https:\/\/www.mdpi.com\/2076-3417\/14\/21\/9890","journal-title":"Applied Sciences"},{"issue":"2","key":"10202_CR6","doi-asserted-by":"publisher","first-page":"993","DOI":"10.1007\/s12065-022-00772-5","volume":"17","author":"KR Bagadi","year":"2024","unstructured":"Bagadi, K. R., & Sivappagari, C. M. (2024). A robust feature selection method based on meta-heuristic optimization for speech emotion recognition. Evolutionary Intelligence, 17(2), 993\u20131004. https:\/\/doi.org\/10.1007\/s12065-022-00772-5","journal-title":"Evolutionary Intelligence"},{"key":"10202_CR7","doi-asserted-by":"publisher","unstructured":"Bharti, S. K., Varadhaganapathy, S., Gupta, R. K., Shukla, P. K., Bouye, M., Hingaa, S. K., & Mahmoud, A. (2022). Text-based emotion recognition using a deep learning approach. Computational Intelligence and Neuroscience, 2022, 1\u20138. https:\/\/doi.org\/10.1155\/2022\/2645381","DOI":"10.1155\/2022\/2645381"},{"issue":"2","key":"10202_CR8","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1007\/s10772-020-09713-y","volume":"23","author":"A Christy","year":"2020","unstructured":"Christy, A., Vaithyasubramanian, S., Jesudoss, A., & Praveena, M. D. (2020). Multimodal speech emotion recognition and classification using convolutional neural network techniques. International Journal of Speech Technology, 23(2), 381\u2013388. https:\/\/doi.org\/10.1007\/s10772-020-09713-y","journal-title":"International Journal of Speech Technology"},{"key":"10202_CR9","doi-asserted-by":"publisher","first-page":"168865","DOI":"10.1109\/access.2020.3023871","volume":"8","author":"Y Cimtay","year":"2020","unstructured":"Cimtay, Y., Ekmekcioglu, E., & Caglar-Ozhan, S. (2020). Cross-subject multimodal emotion recognition based on hybrid fusion. IEEE Access: Practical Innovations, Open Solutions, 8, 168865\u2013168878. https:\/\/doi.org\/10.1109\/access.2020.3023871","journal-title":"IEEE Access: Practical Innovations, Open Solutions"},{"issue":"1\u20132","key":"10202_CR10","doi-asserted-by":"publisher","first-page":"1261","DOI":"10.1007\/s11042-019-08222-8","volume":"79","author":"F Daneshfar","year":"2020","unstructured":"Daneshfar, F., & Kabudian, S. J. (2020). Speech emotion recognition using discriminative dimension reduction by employing a modified quantum-behaved particle swarm optimization algorithm. Multimedia Tools and Applications, 79(1\u20132), 1261\u20131289. https:\/\/doi.org\/10.1007\/s11042-019-08222-8","journal-title":"Multimedia Tools and Applications"},{"key":"10202_CR11","doi-asserted-by":"publisher","first-page":"101646","DOI":"10.1016\/j.bspc.2019.101646","volume":"55","author":"JA Dom\u00ednguez-Jim\u00e9nez","year":"2020","unstructured":"Dom\u00ednguez-Jim\u00e9nez, J. A., Campo-Landines, K. C., Mart\u00ednez-Santos, J. C., Delahoz, E. J., & Contreras-Ortiz, S. H. (2020). A machine learning model for emotion recognition from physiological signals. Biomedical Signal Processing and Control, 55, 101646. https:\/\/doi.org\/10.1016\/j.bspc.2019.101646","journal-title":"Biomedical Signal Processing and Control"},{"key":"10202_CR12","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/j.neucom.2019.10.096","volume":"380","author":"Z Gao","year":"2020","unstructured":"Gao, Z., Li, Y., Yang, Y., Wang, X., Dong, N., & Chiang, H. D. (2020). A GPSO-optimized convolutional neural network for EEG-based emotion recognition. Neurocomputing, 380, 225\u2013235. https:\/\/doi.org\/10.1016\/j.neucom.2019.10.096","journal-title":"Neurocomputing"},{"issue":"1","key":"10202_CR13","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1007\/s13246-019-00825-7","volume":"43","author":"A Goshvarpour","year":"2020","unstructured":"Goshvarpour, A., & Goshvarpour, A. (2020). The potential of photoplethysmogram and galvanic skin response in emotion recognition using nonlinear features. Physical and Engineering Sciences in Medicine, 43(1), 119\u2013134. https:\/\/doi.org\/10.1007\/s13246-019-00825-7","journal-title":"Physical and Engineering Sciences in Medicine"},{"key":"10202_CR14","doi-asserted-by":"publisher","first-page":"96994","DOI":"10.1109\/access.2020.2991811","volume":"8","author":"S Hamsa","year":"2020","unstructured":"Hamsa, S., Shahin, I., Iraqi, Y., & Werghi, N. (2020). Emotion recognition from speech using wavelet packet transform, cochlear filter bank, and random forest classifier. IEEE Access: Practical Innovations, Open Solutions, 8, 96994\u201397006. https:\/\/doi.org\/10.1109\/access.2020.2991811","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"key":"10202_CR15","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2023.3241234","author":"H Han","year":"2023","unstructured":"Han, H., Xu, C., & Li, S. Z. (2023). Cross-domain facial emotion generation using StarGAN. IEEE Transactions on Affective Computing. Advance online publication. https:\/\/doi.org\/10.1109\/TAFFC.2023.3241234","journal-title":"IEEE Transactions on Affective Computing Early Access"},{"key":"10202_CR16","doi-asserted-by":"publisher","first-page":"61672","DOI":"10.1109\/access.2020.2984368","volume":"8","author":"NH Ho","year":"2020","unstructured":"Ho, N. H., Yang, H. J., Kim, S. H., & Lee, G. (2020). Multimodal approach of speech emotion recognition using multi-level multi-head fusion attention-based recurrent neural network. IEEE Access: Practical Innovations, Open Solutions, 8, 61672\u201361686. https:\/\/doi.org\/10.1109\/access.2020.2984368","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"key":"10202_CR17","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.inffus.2018.09.008","volume":"49","author":"MS Hossain","year":"2019","unstructured":"Hossain, M. S., & Muhammad, G. (2019). Emotion recognition using a deep learning approach from audio\u2013visual emotional big data. Information Fusion, 49, 69\u201378. https:\/\/doi.org\/10.1016\/j.inffus.2018.09.008","journal-title":"Information Fusion"},{"key":"10202_CR18","doi-asserted-by":"publisher","first-page":"107887","DOI":"10.1016\/j.asoc.2021.107887","volume":"113","author":"XM Hu","year":"2021","unstructured":"Hu, X. M., Zhang, S. R., Li, M., & Deng, J. D. (2021). Multimodal particle swarm optimization for feature selection. Applied Soft Computing, 113, 107887. https:\/\/doi.org\/10.1016\/j.asoc.2021.107887","journal-title":"Applied Soft Computing"},{"issue":"21","key":"10202_CR19","doi-asserted-by":"publisher","first-page":"11255","DOI":"10.3390\/app122111255","volume":"12","author":"Z Hu","year":"2022","unstructured":"Hu, Z., Chen, L., Luo, Y., & Zhou, J. (2022). EEG-Based emotion recognition using convolutional recurrent neural network with multi-head self-attention. Applied Sciences, 12(21), 11255. https:\/\/doi.org\/10.3390\/app122111255","journal-title":"Applied Sciences"},{"key":"10202_CR20","doi-asserted-by":"publisher","first-page":"3265","DOI":"10.1109\/access.2019.2962085","volume":"8","author":"H Huang","year":"2019","unstructured":"Huang, H., Hu, Z., Wang, W., & Wu, M. (2019). Multimodal emotion recognition based on ensemble convolutional neural network. IEEE Access: Practical Innovations, Open Solutions, 8, 3265\u20133271. https:\/\/doi.org\/10.1109\/access.2019.2962085","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"issue":"23","key":"10202_CR21","doi-asserted-by":"publisher","first-page":"2891","DOI":"10.3390\/electronics10232891","volume":"10","author":"S Huang","year":"2021","unstructured":"Huang, S., Dang, H., Jiang, R., Hao, Y., Xue, C., & Gu, W. (2021). Multi-layer hybrid fuzzy classification based on SVM and improved PSO for speech emotion recognition. Electronics, 10(23), 2891. https:\/\/doi.org\/10.3390\/electronics10232891","journal-title":"Electronics"},{"key":"10202_CR22","doi-asserted-by":"publisher","first-page":"101894","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"Issa, D., Fatih Demirci, M., & Yazici, A. (2020). Speech emotion recognition with deep convolutional neural networks. Biomedical Signal Processing and Control, 59, 101894. https:\/\/doi.org\/10.1016\/j.bspc.2020.101894","journal-title":"Biomedical Signal Processing and Control"},{"key":"10202_CR23","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.patrec.2019.01.008","volume":"120","author":"DK Jain","year":"2019","unstructured":"Jain, D. K., Shamsolmoali, P., & Sehdev, P. (2019). Extended deep neural network for facial emotion recognition. Pattern Recognition Letters, 120, 69\u201374. https:\/\/doi.org\/10.1016\/j.patrec.2019.01.008","journal-title":"Pattern Recognition Letters"},{"key":"10202_CR24","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1016\/j.inffus.2019.06.019","volume":"53","author":"Y Jiang","year":"2020","unstructured":"Jiang, Y., Li, W., Hossain, M. S., Chen, M., Alelaiwi, A., & Al-Hammadi, M. (2020). A snapshot research and implementation of multimodal information fusion for data-driven emotion recognition. Information Fusion, 53, 209\u2013221. https:\/\/doi.org\/10.1016\/j.inffus.2019.06.019","journal-title":"Information Fusion"},{"key":"10202_CR25","doi-asserted-by":"publisher","unstructured":"Karani, R., Jani, J., & Desai, S. (2024). Fer-Bharat: A lightweight deep learning network for efficient unimodal facial emotion recognition in Indian context. Discover Artificial Intelligence, 4(1). https:\/\/doi.org\/10.1007\/s44163-024-00131-6","DOI":"10.1007\/s44163-024-00131-6"},{"key":"10202_CR26","doi-asserted-by":"publisher","first-page":"110525","DOI":"10.1016\/j.knosys.2023.110525","volume":"270","author":"M Khan","year":"2023","unstructured":"Khan, M., Saddik, E., Alotaibi, A., F. S., & Pham, N. T. (2023). AAD-net: Advanced end-to-end signal processing system for human emotion detection & recognition using attention-based deep echo state network. Knowledge-Based Systems, 270, 110525. https:\/\/doi.org\/10.1016\/j.knosys.2023.110525","journal-title":"Knowledge-Based Systems"},{"key":"10202_CR27","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1016\/j.neunet.2022.03.017","volume":"150","author":"P Kumar","year":"2022","unstructured":"Kumar, P., & Raman, B. (2022). A Bert based dual-channel explainable text emotion recognition system. Neural Networks, 150, 392\u2013407. https:\/\/doi.org\/10.1016\/j.neunet.2022.03.017","journal-title":"Neural Networks"},{"issue":"16","key":"10202_CR29","doi-asserted-by":"publisher","first-page":"7817","DOI":"10.1007\/s00500-022-06804-7","volume":"26","author":"N Kumari","year":"2022","unstructured":"Kumari, N., & Bhatia, R. (2022). Efficient facial emotion recognition model using deep convolutional neural network and modified joint trilateral filter. Soft Computing, 26(16), 7817\u20137830. https:\/\/doi.org\/10.1007\/s00500-022-06804-7","journal-title":"Soft Computing"},{"key":"10202_CR28","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/j.future.2021.01.014","volume":"118","author":"K Kumari","year":"2021","unstructured":"Kumari, K., Singh, J. P., Dwivedi, Y. K., & Rana, N. P. (2021). Multi-modal aggression identification using convolutional neural network and binary particle swarm optimization. Future Generation Computer Systems, 118, 187\u2013197. https:\/\/doi.org\/10.1016\/j.future.2021.01.014","journal-title":"Future Generation Computer Systems"},{"key":"10202_CR30","doi-asserted-by":"publisher","first-page":"114177","DOI":"10.1016\/j.eswa.2020.114177","volume":"167","author":"S Kwon","year":"2021","unstructured":"Kwon, S. (2021). MLT-DNet: Speech emotion recognition using 1D dilated CNN based on multi-learning trick approach. Expert Systems with Applications, 167, 114177. https:\/\/doi.org\/10.1016\/j.eswa.2020.114177","journal-title":"Expert Systems with Applications"},{"key":"10202_CR31","doi-asserted-by":"publisher","first-page":"14742","DOI":"10.1109\/access.2023.3244390","volume":"11","author":"HD Le","year":"2023","unstructured":"Le, H. D., Lee, G. S., Kim, S. H., Kim, S., & Yang, H. J. (2023). Multi-label multimodal emotion recognition with transformer-based fusion and emotion-level representation learning. IEEE Access: Practical Innovations, Open Solutions, 11, 14742\u201314751. https:\/\/doi.org\/10.1109\/access.2023.3244390","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"key":"10202_CR32","doi-asserted-by":"publisher","first-page":"94557","DOI":"10.1109\/access.2021.3092735","volume":"9","author":"S Lee","year":"2021","unstructured":"Lee, S., Han, D. K., & Ko, H. (2021). Multimodal emotion recognition fusion analysis adapting BERT with heterogeneous feature unification. IEEE Access: Practical Innovations, Open Solutions, 9, 94557\u201394572. https:\/\/doi.org\/10.1109\/access.2021.3092735","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"issue":"3","key":"10202_CR33","doi-asserted-by":"publisher","first-page":"102185","DOI":"10.1016\/j.ipm.2019.102185","volume":"57","author":"C Li","year":"2020","unstructured":"Li, C., Bao, Z., Li, L., & Zhao, Z. (2020). Exploring temporal representations by leveraging attention-based bidirectional LSTM-RNNS for multi-modal emotion recognition. Information Processing & Management, 57(3), 102185. https:\/\/doi.org\/10.1016\/j.ipm.2019.102185","journal-title":"Information Processing &amp; Management"},{"key":"10202_CR34","unstructured":"Li, J., Wang, X., Lv, G., & Zeng, Z. (2022). GA2MIF: Graph and attention based two-stage multi-source information fusion for conversational emotion detection. IEEE Transactions on affective computing, arXiv preprint arXiv:2207.11900."},{"key":"10202_CR35","doi-asserted-by":"publisher","first-page":"126427","DOI":"10.1016\/j.neucom.2023.126427","volume":"550","author":"J Li","year":"2023","unstructured":"Li, J., Wang, X., Lv, G., & Zeng, Z. (2023). GraphMFT: A graph network based multimodal fusion technique for emotion recognition in conversation. Neurocomputing, 550, 126427. https:\/\/doi.org\/10.1016\/j.neucom.2023.126427","journal-title":"Neurocomputing"},{"issue":"2","key":"10202_CR38","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1109\/tcds.2021.3071170","volume":"14","author":"W Liu","year":"2021","unstructured":"Liu, W., Qiu, J. L., Zheng, W. L., & Lu, B. L. (2021). Comparing recognition performance and robustness of multimodal deep learning models for multimodal emotion recognition. IEEE Transactions on Cognitive and Developmental Systems, 14(2), 715\u2013729. https:\/\/doi.org\/10.1109\/tcds.2021.3071170","journal-title":"IEEE Transactions on Cognitive and Developmental Systems"},{"key":"10202_CR36","doi-asserted-by":"crossref","unstructured":"Liu, P., Li, K., & Meng, H. (2022a). Group gated fusion on attention-based bidirectional alignment for multimodal emotion recognition. Published in INTERSPEECH-2020, arXiv preprint arXiv:2201.06309.","DOI":"10.21437\/Interspeech.2020-2067"},{"key":"10202_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.specom.2022.02.006","volume":"139","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Sun, H., Guan, W., Xia, Y., & Zhao, Z. (2022b). Multi-modal speech emotion recognition using self-attention mechanism and multi-scale fusion framework. Speech Communication, 139, 1\u20139. https:\/\/doi.org\/10.1016\/j.specom.2022.02.006","journal-title":"Speech Communication"},{"key":"10202_CR37","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1016\/j.ins.2022.11.076","volume":"619","author":"S Liu","year":"2023","unstructured":"Liu, S., Gao, P., Li, Y., Fu, W., & Ding, W. (2023). Multi-modal fusion network with complementarity and importance for emotion recognition. Information Sciences, 619, 679\u2013694. https:\/\/doi.org\/10.1016\/j.ins.2022.11.076","journal-title":"Information Sciences"},{"key":"10202_CR40","doi-asserted-by":"publisher","first-page":"108580","DOI":"10.1016\/j.knosys.2022.108580","volume":"244","author":"AI Middya","year":"2022","unstructured":"Middya, A. I., Nag, B., & Roy, S. (2022). Deep learning based multimodal emotion recognition using model-level fusion of audio\u2013visual modalities. Knowledge-Based Systems, 244, 108580. https:\/\/doi.org\/10.1016\/j.knosys.2022.108580","journal-title":"Knowledge-Based Systems"},{"issue":"9","key":"10202_CR41","doi-asserted-by":"publisher","first-page":"3046","DOI":"10.3390\/s21093046","volume":"21","author":"S Minaee","year":"2021","unstructured":"Minaee, S., Minaei, M., & Abdolrashidi, A. (2021). Deep-emotion: Facial expression recognition using attentional convolutional network. Sensors, 21(9), 3046. https:\/\/doi.org\/10.3390\/s21093046","journal-title":"Sensors (Basel, Switzerland)"},{"key":"10202_CR42","doi-asserted-by":"publisher","first-page":"104676","DOI":"10.1016\/j.imavis.2023.104676","volume":"133","author":"B Mocanu","year":"2023","unstructured":"Mocanu, B., Tapu, R., & Zaharia, T. (2023). Multimodal emotion recognition using cross modal audio-video fusion with attention and deep metric learning. Image and Vision Computing, 133, 104676. https:\/\/doi.org\/10.1016\/j.imavis.2023.104676","journal-title":"Image and Vision Computing"},{"key":"10202_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/tim.2020.3031835","volume":"70","author":"K Mohan","year":"2020","unstructured":"Mohan, K., Seal, A., Krejcar, O., & Yazidi, A. (2020). Facial expression recognition using local gravitational force descriptor-based deep Convolution neural networks. IEEE Transactions on Instrumentation and Measurement, 70, 1\u201312. https:\/\/doi.org\/10.1109\/tim.2020.3031835","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"issue":"1","key":"10202_CR44","doi-asserted-by":"publisher","first-page":"183","DOI":"10.3390\/s20010183","volume":"20","author":"Mustaqeem","year":"2019","unstructured":"Mustaqeem, & Kwon, S. (2019). A CNN-assisted enhanced audio signal processing for speech emotion recognition. Sensors, 20(1), 183. https:\/\/doi.org\/10.3390\/s20010183","journal-title":"Sensors (Basel, Switzerland)"},{"key":"10202_CR45","doi-asserted-by":"publisher","first-page":"172948","DOI":"10.1109\/access.2019.2955637","volume":"7","author":"S Nemati","year":"2019","unstructured":"Nemati, S., Rohani, R., Basiri, M. E., Abdar, M., Yen, N. Y., & Makarenkov, V. (2019). A hybrid latent space data fusion method for multimodal emotion recognition. IEEE Access: Practical Innovations, Open Solutions, 7, 172948\u2013172964. https:\/\/doi.org\/10.1109\/access.2019.2955637","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"issue":"3","key":"10202_CR46","doi-asserted-by":"publisher","first-page":"1903","DOI":"10.1007\/s12652-021-03407-2","volume":"14","author":"B Pan","year":"2023","unstructured":"Pan, B., Hirota, K., Jia, Z., Zhao, L., Jin, X., & Dai, Y. (2023a). Multimodal emotion recognition based on feature selection and extreme learning machine in video clips. Journal of Ambient Intelligence and Humanized Computing, 14(3), 1903\u20131917. https:\/\/doi.org\/10.1007\/s12652-021-03407-2","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"key":"10202_CR47","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1109\/ojemb.2023.3240280","volume":"5","author":"J Pan","year":"2023","unstructured":"Pan, J., Fang, W., Zhang, Z., Chen, B., Zhang, Z., & Wang, S. (2023b). Multimodal emotion recognition based on facial expressions, speech, and EEG. IEEE Open Journal of Engineering in Medicine and Biology, 5, 396\u2013403. https:\/\/doi.org\/10.1109\/ojemb.2023.3240280","journal-title":"IEEE Open Journal of Engineering in Medicine and Biology"},{"issue":"27","key":"10202_CR48","doi-asserted-by":"publisher","first-page":"42763","DOI":"10.1007\/s11042-023-15275-3","volume":"82","author":"SK Panda","year":"2023","unstructured":"Panda, S. K., Jena, A. K., Panda, M. R., & Panda, S. (2023). Speech emotion recognition using multimodal feature fusion with machine learning approach. Multimedia Tools and Applications, 82(27), 42763\u201342781. https:\/\/doi.org\/10.1007\/s11042-023-15275-3","journal-title":"Multimedia Tools and Applications"},{"issue":"S1","key":"10202_CR50","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/s40998-018-0142-9","volume":"43","author":"F Rahdari","year":"2019","unstructured":"Rahdari, F., Rashedi, E., & Eftekhari, M. (2019). A multimodal emotion recognition system using facial landmark analysis. Iranian Journal of Science and Technology Transactions of Electrical Engineering, 43(S1), 171\u2013189. https:\/\/doi.org\/10.1007\/s40998-018-0142-9","journal-title":"Iranian Journal of Science and Technology Transactions of Electrical Engineering"},{"issue":"3","key":"10202_CR51","doi-asserted-by":"publisher","first-page":"571","DOI":"10.1007\/s10772-021-09870-8","volume":"26","author":"S Ramesh","year":"2023","unstructured":"Ramesh, S., Gomathi, S., Sasikala, S., & Saravanan, T. R. (2023). Automatic speech emotion detection using hybrid of Gray Wolf optimizer and Na\u00efve Bayes. International Journal of Speech Technology, 26(3), 571\u2013578. https:\/\/doi.org\/10.1007\/s10772-021-09870-8","journal-title":"International Journal of Speech Technology"},{"key":"10202_CR52","unstructured":"Roy, S., Das, A., & Bhowmick, A. (2024). EmoDiff: A dffusion-based framework for balanced and effective emotion classification. arXiv preprint. https:\/\/arxiv.org\/abs\/2411.10863"},{"issue":"2","key":"10202_CR53","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1016\/j.eij.2020.07.005","volume":"22","author":"ES Salama","year":"2021","unstructured":"Salama, E. S., El-Khoribi, R. A., Shoman, M. E., & Wahby Shalaby, M. A. (2021). A 3D-convolutional neural network framework with ensemble learning techniques for multi-modal emotion recognition. Egyptian Informatics Journal, 22(2), 167\u2013176. https:\/\/doi.org\/10.1016\/j.eij.2020.07.005","journal-title":"Egyptian Informatics Journal"},{"key":"10202_CR54","doi-asserted-by":"publisher","first-page":"3560","DOI":"10.1016\/j.matpr.2021.07.297","volume":"80","author":"K Sarvakar","year":"2023","unstructured":"Sarvakar, K., Senkamalavalli, R., Raghavendra, S., Santosh Kumar, J., Manjunath, R., & Jaiswal, S. (2023). Facial emotion recognition using convolutional neural networks. Materials Today: Proceedings, 80, 3560\u20133564. https:\/\/doi.org\/10.1016\/j.matpr.2021.07.297","journal-title":"Materials Today: Proceedings"},{"key":"10202_CR55","doi-asserted-by":"publisher","first-page":"103970","DOI":"10.1016\/j.bspc.2022.103970","volume":"78","author":"M Sharafi","year":"2022","unstructured":"Sharafi, M., Yazdchi, M., Rasti, R., & Nasimi, F. (2022). A novel spatio-temporal convolutional neural framework for multimodal emotion recognition. Biomedical Signal Processing and Control, 78, 103970. https:\/\/doi.org\/10.1016\/j.bspc.2022.103970","journal-title":"Biomedical Signal Processing and Control"},{"key":"10202_CR56","doi-asserted-by":"publisher","unstructured":"Siam, A. I., Soliman, N. F., Algarni, A. D., El-Samie, A., F. E., & Sedik, A. (2022). Deploying machine learning techniques for human emotion detection. Computational Intelligence and Neuroscience, 2022, 1\u201316. https:\/\/doi.org\/10.1155\/2022\/8032673","DOI":"10.1155\/2022\/8032673"},{"key":"10202_CR57","unstructured":"Siddhad, N. M., & Sahu, A. K. (2024). EEG-Based Emotion Recognition using Diffusion Models. arXiv preprint. https:\/\/arxiv.org\/abs\/2401.16878"},{"key":"10202_CR58","doi-asserted-by":"publisher","first-page":"176274","DOI":"10.1109\/access.2020.3026823","volume":"8","author":"S Siriwardhana","year":"2020","unstructured":"Siriwardhana, S., Kaluarachchi, T., Billinghurst, M., & Nanayakkara, S. (2020). Multimodal emotion recognition with Transformer-based self supervised feature fusion. Ieee Access: Practical Innovations, Open Solutions, 8, 176274\u2013176285. https:\/\/doi.org\/10.1109\/access.2020.3026823","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"key":"10202_CR59","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/tim.2024.3420349","volume":"73","author":"P Sriram Kumar","year":"2024","unstructured":"Sriram Kumar, P., Govarthan, P. K., Shaik Gadda, A., Ganapathy, A., N., & Ronickom, F. A., J (2024). Deep learning-based automated emotion recognition using multimodal physiological signals and time-frequency methods. IEEE Transactions on Instrumentation and Measurement, 73, 1\u201312. https:\/\/doi.org\/10.1109\/tim.2024.3420349","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"10202_CR60","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.specom.2019.10.004","volume":"115","author":"L Sun","year":"2019","unstructured":"Sun, L., Zou, B., Fu, S., Chen, J., & Wang, F. (2019). Speech emotion recognition based on DNN-decision tree SVM model. Speech Communication, 115, 29\u201337. https:\/\/doi.org\/10.1016\/j.specom.2019.10.004","journal-title":"Speech Communication"},{"key":"10202_CR61","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/j.patrec.2023.04.019","volume":"170","author":"Y Sun","year":"2023","unstructured":"Sun, Y., Liu, J., & Fang, Y. (2023). Facial expression synthesis for emotion recognition via conditional GANs. Pattern Recognition Letters, 170, 33\u201341. https:\/\/doi.org\/10.1016\/j.patrec.2023.04.019","journal-title":"Pattern Recognition Letters"},{"issue":"18","key":"10202_CR62","doi-asserted-by":"publisher","first-page":"53497","DOI":"10.1007\/s11042-023-17653-3","volume":"83","author":"M Tahir","year":"2024","unstructured":"Tahir, M., Halim, Z., Waqas, M., Sukhia, K. N., & Tu, S. (2024). Emotion detection using convolutional neural network and long short-term memory: A deep multimodal framework. Multimedia Tools and Applications, 83(18), 53497\u201353530. https:\/\/doi.org\/10.1007\/s11042-023-17653-3","journal-title":"Multimedia Tools and Applications"},{"issue":"6","key":"10202_CR63","doi-asserted-by":"publisher","first-page":"6585","DOI":"10.1007\/s12652-021-03529-7","volume":"14","author":"P Tiwari","year":"2023","unstructured":"Tiwari, P., Rathod, H., Thakkar, S., & Darji, A. D. (2023). Multimodal emotion recognition using SDA-LDA algorithm in video clips. Journal of Ambient Intelligence and Humanized Computing, 14(6), 6585\u20136602. https:\/\/doi.org\/10.1007\/s12652-021-03529-7","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"key":"10202_CR65","doi-asserted-by":"publisher","first-page":"115831","DOI":"10.1016\/j.image.2020.115831","volume":"84","author":"X Wang","year":"2020","unstructured":"Wang, X., Chen, X., & Cao, C. (2020a). Human emotion recognition by optimally fusing facial expression and speech feature. Signal Processing: Image Communication, 84, 115831. https:\/\/doi.org\/10.1016\/j.image.2020.115831","journal-title":"Signal Processing: Image Communication"},{"issue":"4","key":"10202_CR66","doi-asserted-by":"publisher","first-page":"923","DOI":"10.1007\/s13042-019-01056-8","volume":"11","author":"Z Wang","year":"2020","unstructured":"Wang, Z., Zhou, X., Wang, W., & Liang, C. (2020b). Emotion recognition using multimodal deep learning in multiple Psychophysiological signals and video. International Journal of Machine Learning and Cybernetics, 11(4), 923\u2013934. https:\/\/doi.org\/10.1007\/s13042-019-01056-8","journal-title":"International Journal of Machine Learning and Cybernetics"},{"key":"10202_CR64","doi-asserted-by":"publisher","first-page":"33061","DOI":"10.1109\/access.2023.3263670","volume":"11","author":"S Wang","year":"2023","unstructured":"Wang, S., Qu, J., Zhang, Y., & Zhang, Y. (2023). Multimodal emotion recognition from EEG signals and facial expressions. Ieee Access: Practical Innovations, Open Solutions, 11, 33061\u201333068. https:\/\/doi.org\/10.1109\/access.2023.3263670","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"key":"10202_CR67","doi-asserted-by":"publisher","first-page":"101756","DOI":"10.1016\/j.bspc.2019.101756","volume":"58","author":"C Wei","year":"2020","unstructured":"Wei, C., Chen, L., Song, Z., Lou, X., & Li, D. (2020). EEG-based emotion recognition using simple recurrent units network and ensemble learning. Biomedical Signal Processing and Control, 58, 101756. https:\/\/doi.org\/10.1016\/j.bspc.2019.101756","journal-title":"Biomedical Signal Processing and Control"},{"key":"10202_CR68","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.inffus.2020.06.002","volume":"64","author":"D Xu","year":"2020","unstructured":"Xu, D., Tian, Z., Lai, R., Kong, X., Tan, Z., & Shi, W. (2020). Deep learning based emotion analysis of microblog texts. Information Fusion, 64, 1\u201311. https:\/\/doi.org\/10.1016\/j.inffus.2020.06.002","journal-title":"Information Fusion"},{"issue":"4","key":"10202_CR69","doi-asserted-by":"publisher","first-page":"487","DOI":"10.3390\/sym11040487","volume":"11","author":"L Yang","year":"2019","unstructured":"Yang, L., Ban, X., Mukeshimana, M., & Chen, Z. (2019). Multimodal emotion recognition using the symmetric S-elm-lupi paradigm. Symmetry, 11(4), 487. https:\/\/doi.org\/10.3390\/sym11040487","journal-title":"Symmetry"},{"key":"10202_CR70","doi-asserted-by":"publisher","first-page":"107721","DOI":"10.1016\/j.apacoust.2020.107721","volume":"173","author":"S Yildirim","year":"2021","unstructured":"Yildirim, S., Kaya, Y., & K\u0131l\u0131\u00e7, F. (2021). A modified feature selection method based on metaheuristic algorithms for speech emotion recognition. Applied Acoustics, 173, 107721. https:\/\/doi.org\/10.1016\/j.apacoust.2020.107721","journal-title":"Applied Acoustics"},{"issue":"10","key":"10202_CR71","doi-asserted-by":"publisher","first-page":"7593","DOI":"10.1007\/s00500-019-04387-4","volume":"24","author":"R Zatarain Cabada","year":"2020","unstructured":"Zatarain Cabada, R., Rodriguez Rangel, H., Barron Estrada, M. L., & Cardenas Lopez, H. M. (2020). Hyperparameter optimization in CNN for learning-centered emotion recognition for intelligent tutoring systems. Soft Computing, 24(10), 7593\u20137602. https:\/\/doi.org\/10.1007\/s00500-019-04387-4","journal-title":"Soft Computing"},{"key":"10202_CR73","doi-asserted-by":"publisher","first-page":"164130","DOI":"10.1109\/access.2020.3021994","volume":"8","author":"H Zhang","year":"2020","unstructured":"Zhang, H. (2020). Expression-EEG based collaborative multimodal emotion recognition using deep autoencoder. Ieee Access: Practical Innovations, Open Solutions, 8, 164130\u2013164143. https:\/\/doi.org\/10.1109\/access.2020.3021994","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"issue":"9","key":"10202_CR76","doi-asserted-by":"publisher","first-page":"4386","DOI":"10.1109\/tcyb.2020.2987575","volume":"51","author":"X Zhang","year":"2020","unstructured":"Zhang, X., Liu, J., Shen, J., Li, S., Hou, K., Hu, B., Gao, J., Zhang, T., & Hu, B. (2020). Emotion recognition from multimodal physiological signals using a regularized deep fusion of kernel machine. IEEE Transactions on Cybernetics, 51(9), 4386\u20134399. https:\/\/doi.org\/10.1109\/tcyb.2020.2987575","journal-title":"IEEE Transactions on Cybernetics"},{"key":"10202_CR75","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1016\/j.specom.2020.12.009","volume":"127","author":"S Zhang","year":"2021","unstructured":"Zhang, S., Tao, X., Chuang, Y., & Zhao, X. (2021a). Learning deep multimodal affective features for spontaneous speech emotion recognition. Speech Communication, 127, 73\u201381. https:\/\/doi.org\/10.1016\/j.specom.2020.12.009","journal-title":"Speech Communication"},{"key":"10202_CR77","doi-asserted-by":"publisher","first-page":"7943","DOI":"10.1109\/access.2021.3049516","volume":"9","author":"Y Zhang","year":"2021","unstructured":"Zhang, Y., Cheng, C., & Zhang, Y. (2021b). Multimodal emotion recognition using a hierarchical fusion convolutional neural network. Ieee Access: Practical Innovations, Open Solutions, 9, 7943\u20137951. https:\/\/doi.org\/10.1109\/access.2021.3049516","journal-title":"Ieee Access: Practical Innovations, Open Solutions"},{"key":"10202_CR74","doi-asserted-by":"publisher","first-page":"108078","DOI":"10.1016\/j.cie.2022.108078","volume":"168","author":"J Zhang","year":"2022","unstructured":"Zhang, J., Xing, L., Tan, Z., Wang, H., & Wang, K. (2022). Multi-head attention fusion networks for multi-modal speech emotion recognition. Computers & Industrial Engineering, 168, 108078. https:\/\/doi.org\/10.1016\/j.cie.2022.108078","journal-title":"Computers & Industrial Engineering"},{"key":"10202_CR72","doi-asserted-by":"publisher","unstructured":"Zhang, D., Chen, F., & Chen, X. (2023). DualGATs: Dual Graph Attention Networks for Emotion Recognition in Conversations. Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume1: Long Papers), 7395\u20137408. https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.408","DOI":"10.18653\/v1\/2023.acl-long.408"},{"issue":"35","key":"10202_CR78","doi-asserted-by":"publisher","first-page":"24713","DOI":"10.1007\/s00521-023-08366-7","volume":"35","author":"Y Zhao","year":"2023","unstructured":"Zhao, Y., Mamat, M., Aysa, A., & Ubul, K. (2023). Multimodal sentiment system and method based on CRNN-SVM. Neural Computing and Applications, 35(35), 24713\u201324725. https:\/\/doi.org\/10.1007\/s00521-023-08366-7","journal-title":"Neural Computing and Applications"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10202-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-025-10202-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10202-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T21:29:08Z","timestamp":1757194148000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-025-10202-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":77,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["10202"],"URL":"https:\/\/doi.org\/10.1007\/s10772-025-10202-3","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]},"assertion":[{"value":"4 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All applicable institutional and\/or national guidelines for the care and use of animals were followed.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"For this type of analysis formal consent is not needed.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}},{"value":"The authors declare that they have no potential conflict of interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}