{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:14:31Z","timestamp":1765008871479,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["72501151"],"award-info":[{"award-number":["72501151"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Program of Innovation Improvement for Small and Medium-sized Enterprises of Shandong","award":["2024TSGC0062, 2024TSGC0094"],"award-info":[{"award-number":["2024TSGC0062, 2024TSGC0094"]}]},{"name":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education","award":["2023ZD027, 2024ZD017"],"award-info":[{"award-number":["2023ZD027, 2024ZD017"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1145\/3743093.3771041","type":"proceedings-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:06:16Z","timestamp":1765008376000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["From Feature Alignment to Multimodal Fusion: A Two-Stage Primary Modality-Guided Approach for MSA"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0526-3064","authenticated-orcid":false,"given":"Guoyu","family":"Ma","sequence":"first","affiliation":[{"name":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences)), Jinan, China and Shandong Provincial Key Laboratory of Computing Power Internet and Service Computing, Shandong Fundamental Research Center for Computer Science, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7485-2298","authenticated-orcid":false,"given":"Xiaoqiang","family":"Ren","sequence":"additional","affiliation":[{"name":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences)), Jinan, China and Shandong Provincial Key Laboratory of Computing Power Internet and Service Computing, Shandong Fundamental Research Center for Computer Science, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5318-4981","authenticated-orcid":false,"given":"Yan","family":"Jiang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences)), Jinan, China and Shandong Provincial Key Laboratory of Computing Power Internet and Service Computing, Shandong Fundamental Research Center for Computer Science, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4372-7416","authenticated-orcid":false,"given":"Hongjiao","family":"Guan","sequence":"additional","affiliation":[{"name":"Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center (National Supercomputer Center in Jinan), Qilu University of Technology (Shandong Academy of Sciences)), Jinan, China and Shandong Provincial Key Laboratory of Computing Power Internet and Service Computing, Shandong Fundamental Research Center for Computer Science, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9125-8669","authenticated-orcid":false,"given":"Bing","family":"Xu","sequence":"additional","affiliation":[{"name":"Faculty of Computing, Harbin Institute of Technology, Harbin, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2016.7477553"},{"key":"e_1_3_3_1_3_2","first-page":"82","volume-title":"Proceedings of the 3th Conference of Association for the Advancement of Artificial Intelligence(AAAI)","volume":"2614","author":"Chen Feiyang","year":"2020","unstructured":"Feiyang Chen, Ziqian Luo, Yanyan Xu, and Dengfeng Ke. 2020. Complementary fusion of multi-features and multi-modalities in sentiment analysis. In Proceedings of the 3th Conference of Association for the Advancement of Artificial Intelligence(AAAI) , Vol.\u00a02614. CEUR-WS.org, New York, USA, 82\u201399."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","unstructured":"Ringki Das and Thoudam\u00a0Doren Singh. 2023. Multimodal sentiment analysis: A survey of methods trends and challenges. ACM Comput. Surv. 55 13s (2023) 270:1\u2013270:38. 10.1145\/3586075","DOI":"10.1145\/3586075"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853739"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/N19-1423"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/D16-1044"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/D18-1382"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240714"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681253"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.404"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","unstructured":"Ramandeep Kaur and Sandeep Kautish. 2019. Multimodal sentiment analysis: A survey and comparison. Int. J. Serv. Sci. Manag. Eng. Technol. 10 2 (2019) 38\u201358. 10.4018\/IJSSMET.2019040103","DOI":"10.4018\/IJSSMET.2019040103"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","unstructured":"Songning Lai Xifeng Hu Haoxuan Xu Zhaoxia Ren and Zhi Liu. 2023. Multimodal sentiment analysis: A survey. Displays 80 (2023) 102563. 10.1016\/J.DISPLA.2023.102563","DOI":"10.1016\/J.DISPLA.2023.102563"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-9119-8_18"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445820"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00641"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","unstructured":"Yong Li Jiabei Zeng and Shiguang Shan. 2022. Learning representations for facial actions from unlabeled videos. IEEE Transactions on Knowledge and Data Engineering 44 1 (2022) 302\u2013317. 10.1109\/TPAMI.2020.3011063","DOI":"10.1109\/TPAMI.2020.3011063"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01118"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3696410.3714533"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2022.FINDINGS-NAACL.175"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/P18-1209"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00258"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/P19-1046"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","unstructured":"Navonil Majumder Devamanyu Hazarika Alexander\u00a0F. Gelbukh Erik Cambria and Soujanya Poria. 2018. Multimodal sentiment analysis using hierarchical fusion with context modeling. Knowl. Based Syst. 161 (2018) 124\u2013133. 10.1016\/J.KNOSYS.2018.07.041","DOI":"10.1016\/J.KNOSYS.2018.07.041"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2022.ACL-DEMO.20"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.3115\/V1\/D14-1162"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016892"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2017.134"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/P17-1081"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2020.ACL-MAIN.214"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","unstructured":"Mohammad Soleymani David Garcia Brendan Jou Bj\u00f6rn Schuller Shih-Fu Chang and Maja Pantic. 2017. A survey of multimodal sentiment analysis. Image and Vision Computing 65 (2017) 3\u201314. 10.1016\/J.IMAVIS.2017.08.003","DOI":"10.1016\/J.IMAVIS.2017.08.003"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","unstructured":"Xuemeng Song Chun Wang Changchang Sun Shanshan Feng Min Zhou and Liqiang Nie. 2023. MM-FRec: Multi-modal enhanced fashion item recommendation. IEEE Transactions on Knowledge and Data Engineering 35 10 (2023) 10072\u201310084. 10.1109\/TKDE.2023.3266423","DOI":"10.1109\/TKDE.2023.3266423"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/P19-1656"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i20.35416"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2023.ACL-LONG.287"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3309"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3302"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547754"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/D17-1115"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V32I1.12021"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/P18-1208"},{"key":"e_1_3_3_1_43_2","unstructured":"Amir Zadeh Rowan Zellers Eli Pincus and Louis-Philippe Morency. 2016. MOSI: Multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1606.06259 (2016)."}],"event":{"name":"MMAsia '25: ACM Multimedia Asia","location":"Kuala Lumpur Malaysia","acronym":"MMAsia '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 7th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3743093.3771041","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:10:54Z","timestamp":1765008654000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3743093.3771041"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":42,"alternative-id":["10.1145\/3743093.3771041","10.1145\/3743093"],"URL":"https:\/\/doi.org\/10.1145\/3743093.3771041","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}