{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T15:33:49Z","timestamp":1769182429169,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":39,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819555666","type":"print"},{"value":"9789819555673","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5567-3_29","type":"book-chapter","created":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T21:13:22Z","timestamp":1769116402000},"page":"418-433","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MOFA: Modality-Orthogonalized Fusion Architecture for\u00a0Multimodal Emotion Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-7413-3872","authenticated-orcid":false,"given":"Hongbin","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2422-1786","authenticated-orcid":false,"given":"Rui","family":"Feng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2977-8559","authenticated-orcid":false,"given":"Jie","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9702-6316","authenticated-orcid":false,"given":"Wei","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3524-8933","authenticated-orcid":false,"given":"Jianqin","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6891-6088","authenticated-orcid":false,"given":"Wentao","family":"Xiang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,23]]},"reference":[{"key":"29_CR1","doi-asserted-by":"publisher","unstructured":"A.V., G., T., M., D., P., E., U.: Multimodal emotion recognition with deep learning: advancements, challenges, and future directions. Inf. Fusion 105, 102218 (2024). https:\/\/doi.org\/10.1016\/j.inffus.2023.102218","DOI":"10.1016\/j.inffus.2023.102218"},{"issue":"2","key":"29_CR2","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltrusaitis","year":"2019","unstructured":"Baltrusaitis, T., Ahuja, C., Morency, L.P.: Multimodal machine learning: a survey and taxonomy. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 423\u2013443 (2019). https:\/\/doi.org\/10.1109\/TPAMI.2018.2798607","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"29_CR3","doi-asserted-by":"crossref","unstructured":"Barezi, E.J., Momeni, P., Fung, P.: Modality-based factorization for multimodal fusion arXiv preprint arXiv:1811.1262 (2018)","DOI":"10.18653\/v1\/W19-4331"},{"key":"29_CR4","doi-asserted-by":"publisher","unstructured":"Chumachenko, K., Iosifidis, A., Gabbouj, M.: MMA-DFER: multimodal adaptation of unimodal models for dynamic facial expression recognition in-the-wild. In: CVPR Workshops, pp. 4673\u20134682 (2024). https:\/\/doi.org\/10.1109\/CVPRW63382.2024.00470","DOI":"10.1109\/CVPRW63382.2024.00470"},{"key":"29_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122579","volume":"240","author":"C Dixit","year":"2024","unstructured":"Dixit, C., Satapathy, S.M.: Deep CNN with late fusion for real time multimodal emotion recognition. Expert Syst. Appl. 240, 122579 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2023.122579","journal-title":"Expert Syst. Appl."},{"key":"29_CR6","doi-asserted-by":"publisher","unstructured":"Fan, H., et al.: Multiscale vision transformers. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6804\u20136815 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00675","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"29_CR7","unstructured":"Fu, Z., et al.: A cross-modal fusion network based on self-attention and residual structure for multimodal emotion recognition. arXiv preprint arXiv:2111.02172 (2021)"},{"key":"29_CR8","doi-asserted-by":"publisher","unstructured":"Gong, Y., Chung, Y.A., Glass, J.: AST: audio spectrogram transformer. In: Interspeech 2021 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-698","DOI":"10.21437\/Interspeech.2021-698"},{"key":"29_CR9","doi-asserted-by":"publisher","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: MISA: modality-invariant and -specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1122\u20131131 (2020). https:\/\/doi.org\/10.48550\/arXiv.2005.03545","DOI":"10.48550\/arXiv.2005.03545"},{"issue":"4","key":"29_CR10","doi-asserted-by":"publisher","first-page":"3231","DOI":"10.1109\/TAFFC.2023.3258900","volume":"14","author":"JH Hsu","year":"2023","unstructured":"Hsu, J.H., Wu, C.H.: Applying segment-level attention on bi-modal transformer encoder for audio-visual emotion recognition. IEEE Trans. Affect. Comput. 14(4), 3231\u20133243 (2023). https:\/\/doi.org\/10.1109\/TAFFC.2023.3258900","journal-title":"IEEE Trans. Affect. Comput."},{"key":"29_CR11","doi-asserted-by":"crossref","unstructured":"Hu, D., Hou, X., Wei, L., Jiang, L., Mo, Y.: MM-DFN: multimodal dynamic fusion network for emotion recognition in conversations arXiv preprint arXiv:2203.02385 (2022)","DOI":"10.1109\/ICASSP43922.2022.9747397"},{"issue":"9","key":"29_CR12","doi-asserted-by":"publisher","first-page":"9943","DOI":"10.1109\/TII.2022.3233650","volume":"19","author":"CS Jiang","year":"2023","unstructured":"Jiang, C.S., Liu, Z.T., Wu, M., She, J., Cao, W.H.: Efficient facial expression recognition with representation reinforcement network and transfer self-training for human\u2013machine interaction. IEEE Trans. Industr. Inf. 19(9), 9943\u20139952 (2023). https:\/\/doi.org\/10.1109\/TII.2022.3233650","journal-title":"IEEE Trans. Industr. Inf."},{"key":"29_CR13","unstructured":"Kang, Z., et al.: Hypergraph multi-modal learning for EEG-based emotion recognition in conversation arXiv preprint arXiv:2502.21154 (2025)"},{"issue":"12","key":"29_CR14","doi-asserted-by":"publisher","first-page":"2067","DOI":"10.1109\/TPAMI.2008.26","volume":"30","author":"J Kim","year":"2008","unstructured":"Kim, J., Andre, E.: Emotion recognition based on physiological changes in music listening. IEEE Trans. Pattern Anal. Mach. Intell. 30(12), 2067\u20132083 (2008). https:\/\/doi.org\/10.1109\/TPAMI.2008.26","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"29_CR15","doi-asserted-by":"publisher","unstructured":"Lee, M.H., et al.: EAV: EEG-audio-video dataset for emotion recognition in conversational contexts. Sci. Data 11(1) (2024). https:\/\/doi.org\/10.1038\/s41597-024-03838-4","DOI":"10.1038\/s41597-024-03838-4"},{"issue":"3","key":"29_CR16","doi-asserted-by":"publisher","first-page":"1195","DOI":"10.1109\/TAFFC.2020.2981446","volume":"13","author":"S Li","year":"2022","unstructured":"Li, S., Deng, W.: Deep facial expression recognition: a survey. IEEE Trans. Affect. Comput. 13(3), 1195\u20131215 (2022). https:\/\/doi.org\/10.1109\/TAFFC.2020.2981446","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"9","key":"29_CR17","doi-asserted-by":"publisher","first-page":"10703","DOI":"10.1109\/TPAMI.2023.3257846","volume":"45","author":"D Liu","year":"2023","unstructured":"Liu, D., et al.: Brain-machine coupled learning method for facial emotion recognition. IEEE Trans. Pattern Anal. Mach. Intell. 45(9), 10703\u201310717 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2023.3257846","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"29_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2024.3369130","volume":"73","author":"H Liu","year":"2024","unstructured":"Liu, H., et al.: EEG-based multimodal emotion recognition: A machine learning perspective. IEEE Trans. Instrum. Meas. 73, 1\u201329 (2024). https:\/\/doi.org\/10.1109\/TIM.2024.3369130","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"29_CR19","doi-asserted-by":"publisher","unstructured":"Liu, Z., et al.: Efficient low-rank multimodal fusion with modality-specific factors. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1209","DOI":"10.18653\/v1\/P18-1209"},{"key":"29_CR20","doi-asserted-by":"publisher","unstructured":"Lv, F., Chen, X., Huang, Y., Duan, L., Lin, G.: Progressive modality reinforcement for human multimodal emotion recognition from unaligned multimodal sequences. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2554\u20132562 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00258","DOI":"10.1109\/CVPR46437.2021.00258"},{"key":"29_CR21","doi-asserted-by":"publisher","first-page":"4121","DOI":"10.1109\/TMM.2022.3171679","volume":"25","author":"S Mai","year":"2023","unstructured":"Mai, S., Zeng, Y., Hu, H.: Multimodal information bottleneck: learning minimal sufficient unimodal and multimodal representations. IEEE Trans. Multimedia 25, 4121\u20134134 (2023). https:\/\/doi.org\/10.1109\/TMM.2022.3171679","journal-title":"IEEE Trans. Multimedia"},{"key":"29_CR22","doi-asserted-by":"crossref","unstructured":"McInnes, L., Healy, J., Melville, J.: UMAP: uniform manifold approximation and projection for dimension reduction arXiv preprint arXiv:1802.03426 (2018)","DOI":"10.21105\/joss.00861"},{"key":"29_CR23","doi-asserted-by":"publisher","unstructured":"Mo, S., Morgado, P.: Unveiling the power of audio-visual early fusion transformers with dense interactions through masked modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 27186\u201327196 (2024). https:\/\/doi.org\/10.1109\/CVPR52733.2024.02567","DOI":"10.1109\/CVPR52733.2024.02567"},{"key":"29_CR24","doi-asserted-by":"publisher","unstructured":"Pillalamarri, R., Shanmugam, U.: A review on EEG-based multimodal learning for emotion recognition. Artif. Intell. Rev. 58(5) (2025). https:\/\/doi.org\/10.1007\/s10462-025-11126-9","DOI":"10.1007\/s10462-025-11126-9"},{"issue":"3","key":"29_CR25","doi-asserted-by":"publisher","first-page":"1876","DOI":"10.1109\/TAFFC.2022.3176135","volume":"14","author":"S Saganowski","year":"2023","unstructured":"Saganowski, S., Perz, B., Polak, A.G., Kazienko, P.: Emotion recognition for everyday life using physiological signals from wearables: a systematic literature review. IEEE Trans. Affect. Comput. 14(3), 1876\u20131897 (2023). https:\/\/doi.org\/10.1109\/TAFFC.2022.3176135","journal-title":"IEEE Trans. Affect. Comput."},{"key":"29_CR26","doi-asserted-by":"publisher","first-page":"710","DOI":"10.1109\/TNSRE.2022.3230250","volume":"31","author":"Y Song","year":"2023","unstructured":"Song, Y., Zheng, Q., Liu, B., Gao, X.: EEG Conformer: convolutional transformer for EEG decoding and visualization. IEEE Trans. Neural Syst. Rehabil. Eng. 31, 710\u2013719 (2023). https:\/\/doi.org\/10.1109\/TNSRE.2022.3230250","journal-title":"IEEE Trans. Neural Syst. Rehabil. Eng."},{"issue":"1","key":"29_CR27","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1109\/TAFFC.2023.3274829","volume":"15","author":"L Sun","year":"2024","unstructured":"Sun, L., Lian, Z., Liu, B., Tao, J.: Efficient multimodal transformer with dual-level feature restoration for robust multimodal sentiment analysis. IEEE Trans. Affect. Comput. 15(1), 309\u2013325 (2024). https:\/\/doi.org\/10.1109\/TAFFC.2023.3274829","journal-title":"IEEE Trans. Affect. Comput."},{"key":"29_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102382","volume":"108","author":"L Sun","year":"2024","unstructured":"Sun, L., Lian, Z., Liu, B., Tao, J.: HiCMAE: hierarchical contrastive masked autoencoder for self-supervised audio-visual emotion recognition. Information Fusion 108, 102382 (2024). https:\/\/doi.org\/10.1016\/j.inffus.2024.102382","journal-title":"Information Fusion"},{"key":"29_CR29","doi-asserted-by":"publisher","unstructured":"Tsai, Y.H.H., et al.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1656","DOI":"10.18653\/v1\/P19-1656"},{"key":"29_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2022.105907","volume":"149","author":"Q Wang","year":"2022","unstructured":"Wang, Q., Wang, M., Yang, Y., Zhang, X.: Multi-modal emotion recognition using eeg and speech signals. Comput. Biol. Med. 149, 105907 (2022). https:\/\/doi.org\/10.1016\/j.compbiomed.2022.105907","journal-title":"Comput. Biol. Med."},{"key":"29_CR31","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1016\/j.inffus.2022.03.009","volume":"83\u201384","author":"Y Wang","year":"2022","unstructured":"Wang, Y., et al.: A systematic review on affective computing: emotion models, databases, and recent advances. Inf. Fusion 83\u201384, 19\u201352 (2022). https:\/\/doi.org\/10.1016\/j.inffus.2022.03.009","journal-title":"Inf. Fusion"},{"key":"29_CR32","doi-asserted-by":"publisher","unstructured":"Xiang, P., Lin, C., Wu, K., Bai, O.: Multimae-der: multimodal masked autoencoder for dynamic emotion recognition. In: 2024 14th International Conference on Pattern Recognition Systems (ICPRS), pp. 1\u20137 (2024). https:\/\/doi.org\/10.1109\/ICPRS62101.2024.10677820","DOI":"10.1109\/ICPRS62101.2024.10677820"},{"key":"29_CR33","doi-asserted-by":"publisher","unstructured":"Yadegaridehkordi, E., Noor, N.F.B.M., Ayub, M.N.B., Affal, H.B., Hussin, N.B.: Affective computing in education: a systematic review and future research. Computersand amp; Education 142, 103649 (2019). https:\/\/doi.org\/10.1016\/j.compedu.2019.103649","DOI":"10.1016\/j.compedu.2019.103649"},{"key":"29_CR34","doi-asserted-by":"publisher","unstructured":"Yang, D., Huang, S., Kuang, H., Du, Y., Zhang, L.: Disentangled representation learning for multimodal emotion recognition. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 1642\u20131651 (2022). https:\/\/doi.org\/10.1145\/3503161.3547754","DOI":"10.1145\/3503161.3547754"},{"issue":"3","key":"29_CR35","doi-asserted-by":"publisher","first-page":"1725","DOI":"10.1109\/TAFFC.2024.3370103","volume":"15","author":"J Ye","year":"2024","unstructured":"Ye, J., Yu, Y., Zheng, Y., Liu, Y., Wang, Q.: Dep-FER: facial expression recognition in depressed patients based on voluntary facial expression mimicry. IEEE Trans. Affect. Comput. 15(3), 1725\u20131738 (2024). https:\/\/doi.org\/10.1109\/TAFFC.2024.3370103","journal-title":"IEEE Trans. Affect. Comput."},{"key":"29_CR36","doi-asserted-by":"publisher","unstructured":"Yin, K., Shin, H.B., Li, D., Lee, S.W.: EEG-based multimodal representation learning for emotion recognition. In: 2025 13th International Conference on Brain-Computer Interface (BCI), pp. 1\u20134 (2025). https:\/\/doi.org\/10.1109\/BCI65088.2025.10931743","DOI":"10.1109\/BCI65088.2025.10931743"},{"key":"29_CR37","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., Morency, L.P.: Tensor fusion network for multimodal sentiment analysis. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing (2017). https:\/\/doi.org\/10.18653\/v1\/D17-1115","DOI":"10.18653\/v1\/D17-1115"},{"key":"29_CR38","doi-asserted-by":"publisher","unstructured":"Zhang, H., Wang, Y., Yin, G., Liu, K., Liu, Y., Yu, T.: Learning language-guided adaptive hyper-modality representation for multimodal sentiment analysis. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.49","DOI":"10.18653\/v1\/2023.emnlp-main.49"},{"key":"29_CR39","doi-asserted-by":"publisher","unstructured":"Zhang, S., et al.: Deep learning-based multimodal emotion recognition from audio, visual, and text modalities: A systematic review of recent advancements and future prospects. Expert Syst. Appl. 237, 121692 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2023.121692","DOI":"10.1016\/j.eswa.2023.121692"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5567-3_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T21:13:24Z","timestamp":1769116404000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5567-3_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819555666","9789819555673"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5567-3_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"23 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}