{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T04:42:24Z","timestamp":1781584944656,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62176083,62176084"],"award-info":[{"award-number":["62176083,62176084"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFC3604704"],"award-info":[{"award-number":["2023YFC3604704"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681163","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"5732-5741","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["KEBR: Knowledge Enhanced Self-Supervised Balanced Representation for Multimodal Sentiment Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2452-6862","authenticated-orcid":false,"given":"Aoqiang","family":"Zhu","sequence":"first","affiliation":[{"name":"Hefei University of Technology, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2122-0240","authenticated-orcid":false,"given":"Min","family":"Hu","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, Anhui, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1751-2291","authenticated-orcid":false,"given":"Xiaohua","family":"Wang","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, Anhui, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0233-590X","authenticated-orcid":false,"given":"Jiaoyun","family":"Yang","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, Anhui, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0917-2277","authenticated-orcid":false,"given":"Yiming","family":"Tang","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, Anhui, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4860-9184","authenticated-orcid":false,"given":"Fuji","family":"Ren","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_2_1","volume-title":"Multimodal machine learning: A survey and taxonomy","author":"Baltrusaitis Tadas","year":"2018","unstructured":"Tadas Baltrusaitis, Chaitanya Ahuja, and Louis-Philippe Morency. 2018. Multimodal machine learning: A survey and taxonomy. IEEE transactions on pattern analysis and machine intelligence 41, 2 (2018), 423--443. https:\/\/doi.org\/10.1109\/ TPAMI.2018.2798607"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00019"},{"key":"e_1_3_2_1_4_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901. https:\/\/doi.org\/10.18653\/ V1\/2021.EMNLP-MAIN.723"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.1078"},{"key":"e_1_3_2_1_6_1","first-page":"1086","article-title":"VoxCeleb2","volume":"2018","author":"Chung Joon Son","year":"2018","unstructured":"Joon Son Chung, Arsha Nagrani, and Andrew Zisserman. 2018. VoxCeleb2: Deep Speaker Recognition. In Proc. Interspeech 2018. 1086--1090. https:\/\/doi.org\/10. 21437\/Interspeech.2018--1929","journal-title":"Deep Speaker Recognition. In Proc. Interspeech"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2104.11560"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548137"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v8i1.14550"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1412.6980"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i03.5681"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1907.11692"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1209"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1046"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2925966"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-950"},{"key":"e_1_3_2_1_24_1","volume-title":"Attention bottlenecks for multimodal fusion. Advances in neural information processing systems 34","author":"Nagrani Arsha","year":"2021","unstructured":"Arsha Nagrani, Shan Yang, Anurag Arnab, Aren Jansen, Cordelia Schmid, and Chen Sun. 2021. Attention bottlenecks for multimodal fusion. Advances in neural information processing systems 34 (2021), 14200--14213."},{"key":"e_1_3_2_1_25_1","volume-title":"Garnett (Eds.)","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/ file\/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00806"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1303"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3634706"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.06.004"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.374"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1706.03762"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3183830"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01271"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33017216"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2208.09579"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683120"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3275156"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096655"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.421"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413690"},{"key":"e_1_3_2_1_46_1","volume-title":"Xlnet: Generalized autoregressive pretraining for language understanding. Advances in neural information processing systems 32","author":"Yang Zhilin","year":"2019","unstructured":"Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Russ R Salakhutdinov, and Quoc V Le. 2019. Xlnet: Generalized autoregressive pretraining for language understanding. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.341"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2306.15796"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1115"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1606.06259"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1139"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2202.12093"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.242"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.466"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681163","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681163","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681163"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":56,"alternative-id":["10.1145\/3664647.3681163","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681163","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}