{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T15:44:00Z","timestamp":1771947840543,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819665846","type":"print"},{"value":"9789819665853","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-6585-3_1","type":"book-chapter","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:41:18Z","timestamp":1750689678000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Multimodal Emotion Recognition by\u00a0Fusing Video Semantics in\u00a0Video Learning Scenarios"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-5413-487X","authenticated-orcid":false,"given":"Yuan","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7850-9518","authenticated-orcid":false,"given":"Xiaomei","family":"Tao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3764-2765","authenticated-orcid":false,"given":"Hanxu","family":"Ai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5025-5472","authenticated-orcid":false,"given":"Tao","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2439-4927","authenticated-orcid":false,"given":"Yanling","family":"Gan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,24]]},"reference":[{"key":"1_CR1","doi-asserted-by":"publisher","unstructured":"Alhagry, S., Fahmy, A.A., El-Khoribi, R.A.: Emotion recognition based on EEG using LSTM recurrent neural network. Int. J. Adv. Comput. Sci. Appl. 8(10) (2017). https:\/\/doi.org\/10.14569\/IJACSA.2017.081046","DOI":"10.14569\/IJACSA.2017.081046"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Azhar, K.A., Iqbal, N., Shah, Z., Ahmed, H.: Understanding high dropout rates in MOOCs \u2013 a qualitative case study from Pakistan. Innov. Educ. Teach. Int. (2023)","DOI":"10.1080\/14703297.2023.2200753"},{"issue":"1","key":"1_CR3","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1109\/TCSS.2022.3221128","volume":"11","author":"J Bao","year":"2024","unstructured":"Bao, J., Tao, X., Zhou, Y.: An emotion recognition method based on eye movement and audiovisual features in MOOC learning environment. IEEE Trans. Comput. Soc. Syst. 11(1), 171\u2013183 (2024). https:\/\/doi.org\/10.1109\/TCSS.2022.3221128","journal-title":"IEEE Trans. Comput. Soc. Syst."},{"issue":"1","key":"1_CR4","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/TIT.1967.1053964","volume":"13","author":"T Cover","year":"1967","unstructured":"Cover, T., Hart, P.: Nearest neighbor pattern classification. IEEE Trans. Inf. Theory 13(1), 21\u201327 (1967). https:\/\/doi.org\/10.1109\/TIT.1967.1053964","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1_CR5","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1016\/j.neucom.2021.02.020","volume":"457","author":"K Dashtipour","year":"2021","unstructured":"Dashtipour, K., Gogate, M., Cambria, E., Hussain, A.: A novel context-aware multimodal framework for Persian sentiment analysis. Neurocomputing 457, 377\u2013388 (2021). https:\/\/doi.org\/10.1016\/j.neucom.2021.02.020","journal-title":"Neurocomputing"},{"key":"1_CR6","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"1_CR7","series-title":"Studies in Computational Intelligence","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/978-3-319-25017-5_35","volume-title":"Intelligent Distributed Computing IX","author":"AR Faria","year":"2016","unstructured":"Faria, A.R., Almeida, A., Martins, C., Gon\u00e7alves, R.: Emotion effects on online learning. In: Novais, P., Camacho, D., Analide, C., El Fallah Seghrouchni, A., Badica, C. (eds.) Intelligent Distributed Computing IX. SCI, vol. 616, pp. 375\u2013385. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-25017-5_35"},{"key":"1_CR8","doi-asserted-by":"publisher","unstructured":"Gong, L., Chen, W., Li, M., Zhang, T.: Emotion recognition from multiple physiological signals using intra- and inter-modality attention fusion network. Digit. Sig. Process. 144(C) (2024). https:\/\/doi.org\/10.1016\/j.dsp.2023.104278","DOI":"10.1016\/j.dsp.2023.104278"},{"key":"1_CR9","doi-asserted-by":"publisher","unstructured":"He, H., Bai, Y., Garcia, E.A., Li, S.: ADASYN: adaptive synthetic sampling approach for imbalanced learning. In: 2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence), pp. 1322\u20131328 (2008). https:\/\/doi.org\/10.1109\/IJCNN.2008.4633969","DOI":"10.1109\/IJCNN.2008.4633969"},{"key":"1_CR10","doi-asserted-by":"publisher","unstructured":"Jeevan, R.K., Sp, V.M.R., Shiva\u00a0Kumar, P., Srivikas, M.: EEG-based emotion recognition using LSTM-RNN machine learning algorithm. In: 2019 1st International Conference on Innovations in Information and Communication Technology (ICIICT), pp.\u00a01\u20134 (2019). https:\/\/doi.org\/10.1109\/ICIICT1.2019.8741506","DOI":"10.1109\/ICIICT1.2019.8741506"},{"issue":"3","key":"1_CR11","doi-asserted-by":"publisher","first-page":"1082","DOI":"10.1007\/s12559-023-10119-6","volume":"15","author":"D Jiang","year":"2023","unstructured":"Jiang, D., Liu, H., Wei, R., Tu, G.: CSAT-FTCN: a fuzzy-oriented model with contextual self-attention network for multimodal emotion recognition. Cogn. Comput. 15(3), 1082\u20131091 (2023). https:\/\/doi.org\/10.1007\/s12559-023-10119-6","journal-title":"Cogn. Comput."},{"issue":"2","key":"1_CR12","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/j.bbr.2011.04.025","volume":"223","author":"MJ Kim","year":"2011","unstructured":"Kim, M.J., et al.: The structural and functional connectivity of the amygdala: from normal emotion to pathological anxiety. Behav. Brain Res. 223(2), 403\u2013410 (2011). https:\/\/doi.org\/10.1016\/j.bbr.2011.04.025","journal-title":"Behav. Brain Res."},{"issue":"11","key":"1_CR13","doi-asserted-by":"publisher","first-page":"2755","DOI":"10.1109\/TPAMI.2019.2916866","volume":"42","author":"R Kosti","year":"2020","unstructured":"Kosti, R., Alvarez, J.M., Recasens, A., Lapedriza, A.: Context based emotion recognition using EMOTIC dataset. IEEE Trans. Pattern Anal. Mach. Intell. 42(11), 2755\u20132766 (2020). https:\/\/doi.org\/10.1109\/TPAMI.2019.2916866","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"86","key":"1_CR14","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"van der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(86), 2579\u20132605 (2008)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR15","doi-asserted-by":"publisher","unstructured":"Meei\u00a0Tyng, C., Amin, H.U., Mohamad\u00a0Saad, M.N., Malik, A.: The influences of emotion on learning and memory. Front. Psychol. 8, 1454 (2017). https:\/\/doi.org\/10.3389\/fpsyg.2017.01454","DOI":"10.3389\/fpsyg.2017.01454"},{"issue":"1","key":"1_CR16","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/T-AFFC.2011.25","volume":"3","author":"M Soleymani","year":"2012","unstructured":"Soleymani, M., Lichtenauer, J., Pun, T., Pantic, M.: A multimodal database for affect recognition and implicit tagging. IEEE Trans. Affect. Comput. 3(1), 42\u201355 (2012). https:\/\/doi.org\/10.1109\/T-AFFC.2011.25","journal-title":"IEEE Trans. Affect. Comput."},{"key":"1_CR17","doi-asserted-by":"publisher","unstructured":"Sun, H.L., et al.: The influence of teacher\u2013student interaction on the effects of online learning: based on a serial mediating model. Front. Psychol. 13 (2022). https:\/\/doi.org\/10.3389\/fpsyg.2022.779217","DOI":"10.3389\/fpsyg.2022.779217"},{"issue":"18","key":"1_CR18","doi-asserted-by":"publisher","first-page":"5328","DOI":"10.3390\/s20185328","volume":"20","author":"C Tan","year":"2020","unstructured":"Tan, C., Ceballos, G., Kasabov, N., Puthanmadam Subramaniyam, N.: FusionSense: emotion classification using feature fusion of multimodal data and deep learning in a brain-inspired spiking neural network. Sensors (Basel, Switzerland) 20(18), 5328 (2020). https:\/\/doi.org\/10.3390\/s20185328","journal-title":"Sensors (Basel, Switzerland)"},{"key":"1_CR19","doi-asserted-by":"publisher","unstructured":"Tao, X., Zhang, Y.: A multimodal intelligent emotion perception framework by data-driven and knowledge-guided. In: 2022 2nd International Conference on Electronic Information Engineering and Computer Technology (EIECT), pp. 70\u201373 (2022). https:\/\/doi.org\/10.1109\/EIECT58010.2022.00019","DOI":"10.1109\/EIECT58010.2022.00019"},{"key":"1_CR20","doi-asserted-by":"publisher","unstructured":"Tzirakis, P., Nguyen, A., Zafeiriou, S., Schuller, B.W.: Speech emotion recognition using semantic information. In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6279\u20136283 (2021). https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9414866","DOI":"10.1109\/ICASSP39728.2021.9414866"},{"key":"1_CR21","unstructured":"Vaswani, A., et al.: Attention is all You need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, NIPS 2017, pp. 6000\u20136010. Curran Associates Inc., Red Hook, NY, USA (2017)"},{"issue":"12","key":"1_CR22","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1016\/j.tics.2005.10.011","volume":"9","author":"P Vuilleumier","year":"2005","unstructured":"Vuilleumier, P.: How brains beware: neural mechanisms of emotional attention. Trends Cogn. Sci. 9(12), 585\u2013594 (2005). https:\/\/doi.org\/10.1016\/j.tics.2005.10.011","journal-title":"Trends Cogn. Sci."},{"key":"1_CR23","doi-asserted-by":"publisher","unstructured":"Wang, Y., Jiang, W.B., Li, R., Lu, B.L.: Emotion transformer fusion: complementary representation properties of EEG and eye movements on recognizing anger and surprise. In: 2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 1575\u20131578 (2021). https:\/\/doi.org\/10.1109\/BIBM52615.2021.9669556","DOI":"10.1109\/BIBM52615.2021.9669556"},{"key":"1_CR24","doi-asserted-by":"publisher","unstructured":"Wang, Y., Guan, X.: Multimodal feature fusion and emotion recognition based on variational autoencoder. In: 2023 IEEE 5th International Conference on Civil Aviation Safety and Information Technology (ICCASIT), pp. 819\u2013823 (2023). https:\/\/doi.org\/10.1109\/ICCASIT58768.2023.10351725","DOI":"10.1109\/ICCASIT58768.2023.10351725"},{"key":"1_CR25","doi-asserted-by":"publisher","unstructured":"Xia, X., Zhao, Y., Jiang, D.: Multimodal interaction enhanced representation learning for video emotion recognition. Front. Neurosci. 16 (2022). https:\/\/doi.org\/10.3389\/fnins.2022.1086380","DOI":"10.3389\/fnins.2022.1086380"},{"key":"1_CR26","doi-asserted-by":"publisher","unstructured":"Pan, X., Hu, B., Zhou, Z., Feng, X.: Are students happier the more they learn? \u2013 Research on the influence of course progress on academic emotion in online learning. Interact. Learn. Environ. 31(10), 6869\u20136889 (2023). https:\/\/doi.org\/10.1080\/10494820.2022.2052110","DOI":"10.1080\/10494820.2022.2052110"},{"key":"1_CR27","doi-asserted-by":"publisher","unstructured":"Yang, M., Wu, Y., Tao, Y., Hu, X., Hu, B.: Trial selection tensor canonical correlation analysis (TSTCCA) for depression recognition with facial expression and pupil diameter. IEEE J. Biomed. Health Inform. PP, 1\u201312 (10 2023). https:\/\/doi.org\/10.1109\/JBHI.2023.3322271","DOI":"10.1109\/JBHI.2023.3322271"},{"key":"1_CR28","doi-asserted-by":"publisher","unstructured":"Ye, H., Zhou, Y., Tao, X.: A method of multimodal emotion recognition in video learning based on knowledge enhancement. Comput. Syst. Sci. Eng. 47(2), 1709\u20131732 (2023). https:\/\/doi.org\/10.32604\/csse.2023.039186","DOI":"10.32604\/csse.2023.039186"},{"key":"1_CR29","doi-asserted-by":"publisher","unstructured":"Ye, Q., et al.: mPLUG-Owl: modularization empowers large language models with multimodality. arXiv (2023). https:\/\/doi.org\/10.48550\/arXiv.2304.14178","DOI":"10.48550\/arXiv.2304.14178"},{"key":"1_CR30","doi-asserted-by":"publisher","first-page":"2213","DOI":"10.1109\/TMM.2022.3144885","volume":"25","author":"J Zheng","year":"2023","unstructured":"Zheng, J., Zhang, S., Wang, Z., Wang, X., Zeng, Z.: Multi-channel weight-sharing autoencoder based on cascade multi-head attention for multimodal emotion recognition. IEEE Trans. Multimedia 25, 2213\u20132225 (2023). https:\/\/doi.org\/10.1109\/TMM.2022.3144885","journal-title":"IEEE Trans. Multimedia"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-6585-3_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:41:21Z","timestamp":1750689681000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-6585-3_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819665846","9789819665853"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-6585-3_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Auckland","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}