{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,9]],"date-time":"2026-07-09T06:45:49Z","timestamp":1783579549931,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681228","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"632-641","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["All rivers run into the sea: Unified Modality Brain-Inspired Emotional Central Mechanism"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4596-5391","authenticated-orcid":false,"given":"Xinji","family":"Mai","sequence":"first","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9499-9983","authenticated-orcid":false,"given":"Junxiong","family":"Lin","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8267-861X","authenticated-orcid":false,"given":"Haoran","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2998-6709","authenticated-orcid":false,"given":"Zeng","family":"Tao","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4953-2660","authenticated-orcid":false,"given":"Yan","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2959-7581","authenticated-orcid":false,"given":"Shaoqi","family":"Yan","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4379-4504","authenticated-orcid":false,"given":"Xuan","family":"Tong","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9532-6376","authenticated-orcid":false,"given":"Jiawen","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2885-2814","authenticated-orcid":false,"given":"Boyang","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8963-4821","authenticated-orcid":false,"given":"Ziheng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8319-5487","authenticated-orcid":false,"given":"Qing","family":"Zhao","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8992-0756","authenticated-orcid":false,"given":"Shuyong","family":"Gao","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3339-8751","authenticated-orcid":false,"given":"Wenqiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI and Robotics, Academy for Engineering and Technology, Fudan University &amp; Engineering Research Center of AI and Robotics, Ministry of Education, Academy for Engineering and Technology, Fudan University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2021.06.003"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41593-022-01253-9"},{"key":"e_1_3_2_1_3_1","volume-title":"Multimodal machine learning: A survey and taxonomy","author":"Baltruvsaitis Tadas","year":"2018","unstructured":"Tadas Baltruvsaitis, Chaitanya Ahuja, and Louis-Philippe Morency. 2018. Multimodal machine learning: A survey and taxonomy. IEEE transactions on pattern analysis and machine intelligence, Vol. 41, 2 (2018), 423--443."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1038\/nrn848"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2017.100"},{"key":"e_1_3_2_1_7_1","volume-title":"Nature","volume":"389","author":"Cohen Leonardo G","year":"1997","unstructured":"Leonardo G Cohen, Pablo Celnik, Alvaro Pascual-Leone, Brian Corwell, Lala Faiz, James Dambrosia, Manabu Honda, Norihiro Sadato, Christian Gerloff, M Dolores Catal\u00e1, et al. 1997. Functional relevance of cross-modal plasticity in blind humans. Nature, Vol. 389, 6647 (1997), 180--183."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","first-page":"451","DOI":"10.1002\/1531-8249(199904)45:4<451::AID-ANA6>3.0.CO;2-B","article-title":"Period of susceptibility for cross-modal plasticity in the blind","volume":"45","author":"Cohen Leonardo G","year":"1999","unstructured":"Leonardo G Cohen, Robert A Weeks, Norihiro Sadato, Pablo Celnik, Kenji Ishii, and Mark Hallett. 1999. Period of susceptibility for cross-modal plasticity in the blind. Annals of Neurology: Official Journal of the American Neurological Association and the Child Neurology Society, Vol. 45, 4 (1999), 451--460.","journal-title":"Annals of Neurology: Official Journal of the American Neurological Association and the Child Neurology Society"},{"key":"e_1_3_2_1_9_1","volume-title":"Cross-modal plasticity for the spatial processing of sounds in visually deprived subjects. Experimental brain research","author":"Collignon Olivier","year":"2009","unstructured":"Olivier Collignon, Patrice Voss, Maryse Lassonde, and Franco Lepore. 2009. Cross-modal plasticity for the spatial processing of sounds in visually deprived subjects. Experimental brain research, Vol. 192 (2009), 343--358."},{"key":"e_1_3_2_1_10_1","volume-title":"Human brain anatomy in computerized images","author":"Damasio Hanna","unstructured":"Hanna Damasio. 2005. Human brain anatomy in computerized images. Oxford university press."},{"key":"e_1_3_2_1_11_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101847"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2010.07.025"},{"key":"e_1_3_2_1_14_1","volume-title":"EmoCLIP: A Vision-Language Method for Zero-Shot Video Facial Expression Recognition. arXiv preprint arXiv:2310.16640","author":"Foteinopoulou Niki Maria","year":"2023","unstructured":"Niki Maria Foteinopoulou and Ioannis Patras. 2023. EmoCLIP: A Vision-Language Method for Zero-Shot Video Facial Expression Recognition. arXiv preprint arXiv:2310.16640 (2023)."},{"key":"e_1_3_2_1_15_1","volume-title":"The functional anatomy of time: what and when in the brain. Trends in cognitive sciences","author":"Friston Karl","year":"2016","unstructured":"Karl Friston and Gyorgy Buzs\u00e1ki. 2016. The functional anatomy of time: what and when in the brain. Trends in cognitive sciences, Vol. 20, 7 (2016), 500--511."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/S1053-8119(03)00114-9"},{"key":"e_1_3_2_1_17_1","volume-title":"Cross-modal plasticity in developmental and age-related hearing loss: Clinical implications. Hearing research","author":"Glick Hannah","year":"2017","unstructured":"Hannah Glick and Anu Sharma. 2017. Cross-modal plasticity in developmental and age-related hearing loss: Clinical implications. Hearing research, Vol. 343 (2017), 191--201."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2013.05.005"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00685"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413620"},{"key":"e_1_3_2_1_22_1","volume-title":"Emotion circuits in the brain. Annual review of neuroscience","author":"LeDoux Joseph E","year":"2000","unstructured":"Joseph E LeDoux. 2000. Emotion circuits in the brain. Annual review of neuroscience, Vol. 23, 1 (2000), 155--184."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00602"},{"key":"e_1_3_2_1_24_1","volume-title":"CLIPER: A Unified Vision-Language Framework for In-the-Wild Facial Expression Recognition. arXiv preprint arXiv:2303.00193","author":"Li Hanting","year":"2023","unstructured":"Hanting Li, Hongjing Niu, Zhaoqing Zhu, and Feng Zhao. 2023. CLIPER: A Unified Vision-Language Framework for In-the-Wild Facial Expression Recognition. arXiv preprint arXiv:2303.00193 (2023)."},{"key":"e_1_3_2_1_25_1","unstructured":"Hanting Li Mingzhe Sui Zhaoqing Zhu et al. 2022. NR-DFERNet: Noise-Robust Network for Dynamic Facial Expression Recognition. arXiv preprint arXiv:2206.04975 (2022)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3423327.3423671"},{"key":"e_1_3_2_1_27_1","unstructured":"Junxiong Lin Zeng Tao Xuan Tong Xinji Mai Haoran Wang Boyang Wang Yan Wang Qing Zhao Jiawen Yu Yuxuan Lin et al. 2024. Suppressing Uncertainties in Degradation Estimation for Blind Super-Resolution. arXiv preprint arXiv:2406.16459 (2024)."},{"key":"e_1_3_2_1_28_1","unstructured":"Junxiong Lin Yan Wang Zeng Tao Boyang Wang Qing Zhao Haorang Wang Xuan Tong Xinji Mai Yuxuan Lin Wei Song et al. 2024. Adaptive Multi-modal Fusion of Spatially Variant Kernel Refinement with Diffusion Model for Blind Image Super-Resolution. arXiv preprint arXiv:2403.05808 (2024)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548190"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1162\/jocn_a_01067"},{"key":"e_1_3_2_1_31_1","volume-title":"Cross-modal plasticity in specific auditory cortices underlies visual compensations in the deaf. Nature neuroscience","author":"Lomber Stephen G","year":"2010","unstructured":"Stephen G Lomber, M Alex Meredith, and Andrej Kral. 2010. Cross-modal plasticity in specific auditory cortices underlies visual compensations in the deaf. Nature neuroscience, Vol. 13, 11 (2010), 1421--1427."},{"key":"e_1_3_2_1_32_1","volume-title":"ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Ma Fuyan","unstructured":"Fuyan Ma, Bin Sun, and Shutao Li. 2023. Logo-Former: Local-Global Spatio-Temporal Transformer for Dynamic Facial Expression Recognition. In ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1--5."},{"key":"e_1_3_2_1_33_1","volume-title":"From Efficient Multimodal Models to World Models: A Survey. arXiv preprint arXiv:2407.00118","author":"Mai Xinji","year":"2024","unstructured":"Xinji Mai, Zeng Tao, Junxiong Lin, Haoran Wang, Yang Chang, Yanlan Kang, Yan Wang, and Wenqiang Zhang. 2024. From Efficient Multimodal Models to World Models: A Survey. arXiv preprint arXiv:2407.00118 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"OUS: Scene-Guided Dynamic Facial Expression Recognition. arXiv preprint arXiv:2405.18769","author":"Mai Xinji","year":"2024","unstructured":"Xinji Mai, Haoran Wang, Zeng Tao, Junxiong Lin, Shaoqi Yan, Yan Wang, Jing Liu, Jiawen Yu, Xuan Tong, Yating Li, et al. 2024. OUS: Scene-Guided Dynamic Facial Expression Recognition. arXiv preprint arXiv:2405.18769 (2024)."},{"key":"e_1_3_2_1_35_1","volume-title":"Nonverbal communication","author":"Mehrabian Albert","unstructured":"Albert Mehrabian. 2017. Nonverbal communication. Routledge."},{"key":"e_1_3_2_1_36_1","volume-title":"Emotion measurement","author":"Mohammad Saif M","unstructured":"Saif M Mohammad. 2016. Sentiment analysis: Detecting valence, emotions, and other affectual states from text. In Emotion measurement. Elsevier, 201--237."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the International AAAI Conference on Web and Social Media","volume":"10","author":"Pereira Mois\u00e9s","year":"2016","unstructured":"Mois\u00e9s Pereira, Fl\u00e1vio P\u00e1dua, Adriano Pereira, Fabr\u00edcio Benevenuto, and Daniel Dalip. 2016. Fusing audio, textual, and visual features for sentiment analysis of news videos. In Proceedings of the International AAAI Conference on Web and Social Media, Vol. 10. 659--662."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2015.01.095"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0219635205000951"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"key":"e_1_3_2_1_41_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1006\/nimg.2002.1111"},{"key":"e_1_3_2_1_43_1","volume-title":"Cross-modal plasticity of tactile perception in blindness. Restorative neurology and neuroscience","author":"Sathian K","year":"2010","unstructured":"K Sathian and Randall Stilla. 2010. Cross-modal plasticity of tactile perception in blindness. Restorative neurology and neuroscience, Vol. 28, 2 (2010), 271--281."},{"key":"e_1_3_2_1_44_1","volume-title":"The merging of the senses","author":"Stein Barry E","unstructured":"Barry E Stein and M Alex Meredith. 1993. The merging of the senses. MIT press."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612365"},{"key":"e_1_3_2_1_46_1","unstructured":"Zeng Tao Yan Wang Junxiong Lin Haoran Wang Xinji Mai Jiawen Yu Xuan Tong Ziheng Zhou Shaoqi Yan Qing Zhao et al. 2024. Align-DFER: Pioneering Comprehensive Dynamic Affective Alignment for Dynamic Facial Expression Recognition with CLIP. arXiv preprint arXiv:2403.04294 (2024)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01722"},{"key":"e_1_3_2_1_49_1","unstructured":"Haoran Wang Xinji Mai Zeng Tao Xuan Tong Junxiong Lin Yan Wang Jiawen Yu Boyang Wang Shaoqi Yan Qing Zhao et al. 2024. Seeking Certainty In Uncertainty: Dual-Stage Unified Framework Solving Uncertainty in Dynamic Facial Expression Recognition. arXiv preprint arXiv:2406.16473 (2024)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02025"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547865"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3057270"},{"key":"e_1_3_2_1_53_1","volume-title":"Deep learning-based multimodal emotion recognition from audio, visual, and text modalities: A systematic review of recent advancements and future prospects. Expert Systems with Applications","author":"Zhang Shiqing","year":"2023","unstructured":"Shiqing Zhang, Yijiao Yang, Chen Chen, Xingnan Zhang, Qingming Leng, and Xiaoming Zhao. 2023. Deep learning-based multimodal emotion recognition from audio, visual, and text modalities: A systematic review of recent advancements and future prospects. Expert Systems with Applications (2023), 121692."},{"key":"e_1_3_2_1_54_1","volume-title":"Prompting Visual-Language Models for Dynamic Facial Expression Recognition. arXiv preprint arXiv:2308.13382","author":"Zhao Zengqun","year":"2023","unstructured":"Zengqun Zhao and Ioannis Patras. 2023. Prompting Visual-Language Models for Dynamic Facial Expression Recognition. arXiv preprint arXiv:2308.13382 (2023)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681228","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681228","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:03Z","timestamp":1750295883000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681228"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":54,"alternative-id":["10.1145\/3664647.3681228","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681228","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}