{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:03:21Z","timestamp":1750309401058,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681056","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"9729-9738","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning in Order! A Sequential Strategy to Learn Invariant Features for Multimodal Sentiment Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5482-3895","authenticated-orcid":false,"given":"Xianbing","family":"Zhao","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7764-431X","authenticated-orcid":false,"given":"Lizhen","family":"Qu","sequence":"additional","affiliation":[{"name":"Monash University, Melbourne, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9490-7353","authenticated-orcid":false,"given":"Tao","family":"Feng","sequence":"additional","affiliation":[{"name":"Monash University, Melbourne, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9444-3763","authenticated-orcid":false,"given":"Jianfei","family":"Cai","sequence":"additional","affiliation":[{"name":"Monash University, Melbourne, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0271-8246","authenticated-orcid":false,"given":"Buzhou","family":"Tang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International Conference on Machine Learning. PMLR, 145--155","author":"Ahuja Kartik","year":"2020","unstructured":"Kartik Ahuja, Karthikeyan Shanmugam, Kush Varshney, and Amit Dhurandhar. 2020. Invariant risk minimization games. In International Conference on Machine Learning. PMLR, 145--155."},{"key":"e_1_3_2_1_2_1","volume-title":"Invariant risk minimization. arXiv preprint arXiv:1907.02893","author":"Arjovsky Martin","year":"2019","unstructured":"Martin Arjovsky, L\u00e9on Bottou, Ishaan Gulrajani, and David Lopez-Paz. 2019. Invariant risk minimization. arXiv preprint arXiv:1907.02893 (2019)."},{"key":"e_1_3_2_1_3_1","first-page":"23284","article-title":"Masktune: Mitigating spurious correlations by forcing to explore","volume":"35","author":"Asgari Saeid","year":"2022","unstructured":"Saeid Asgari, Aliasghar Khani, Fereshte Khani, Ali Gholami, Linh Tran, Ali Mahdavi Amiri, and Ghassan Hamarneh. 2022. Masktune: Mitigating spurious correlations by forcing to explore. Advances in Neural Information Processing Systems, Vol. 35 (2022), 23284--23296.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_4_1","volume-title":"Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond.","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond. (2023)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Yejin Bang Samuel Cahyawijaya Nayeon Lee Wenliang Dai Dan Su Bryan Wilie Holy Lovenia Ziwei Ji Tiezheng Yu Willy Chung et al. 2023. A multitask multilingual multimodal evaluation of ChatGPT on reasoning hallucination and interactivity. arXiv preprint arXiv:2302.04023 (2023).","DOI":"10.18653\/v1\/2023.ijcnlp-main.45"},{"key":"e_1_3_2_1_6_1","volume-title":"Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432","author":"Bengio Yoshua","year":"2013","unstructured":"Yoshua Bengio, Nicholas L\u00e9onard, and Aaron Courville. 2013. Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432 (2013)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20050-2_26"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings, Part IX 16","author":"Chattopadhyay Prithvijit","year":"2020","unstructured":"Prithvijit Chattopadhyay, Yogesh Balaji, and Judy Hoffman. 2020. Learning to balance specificity and invariance for in and out of domain generalization. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part IX 16. Springer, 301--318."},{"volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision. 1751--1760","author":"Chen Liang","key":"e_1_3_2_1_10_1","unstructured":"Liang Chen, Yong Zhang, Yibing Song, Anton van den Hengel, and Lingqiao Liu. 2023. Domain generalization via rationale invariance. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 1751--1760."},{"key":"e_1_3_2_1_11_1","volume-title":"Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555","author":"Clark Kevin","year":"2020","unstructured":"Kevin Clark, Minh-Thang Luong, Quoc V Le, and Christopher D Manning. 2020. Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555 (2020)."},{"key":"e_1_3_2_1_12_1","volume-title":"Junqi Zhao, Weisheng Wang, Boyang Li, Pascale N Fung, and Steven Hoi.","author":"Dai Wenliang","year":"2024","unstructured":"Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale N Fung, and Steven Hoi. 2024. Instructblip: Towards general-purpose vision-language models with instruction tuning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"key":"e_1_3_2_1_14_1","volume-title":"Improving multi-modal learning with uni-modal teachers. arXiv preprint arXiv:2106.11059","author":"Du Chenzhuang","year":"2021","unstructured":"Chenzhuang Du, Tingle Li, Yichen Liu, Zixin Wen, Tianyu Hua, Yue Wang, and Hang Zhao. 2021. Improving multi-modal learning with uni-modal teachers. arXiv preprint arXiv:2106.11059 (2021)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01918"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00561"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00561"},{"key":"e_1_3_2_1_18_1","volume-title":"IMO: Greedy Layer-Wise Sparse Representation Learning for Out-of-Distribution Text Classification with Pre-trained Models. arxiv: 2404.13504 [cs.CL] https:\/\/arxiv.org\/abs\/2404.13504","author":"Feng Tao","year":"2024","unstructured":"Tao Feng, Lizhen Qu, Zhuang Li, Haolan Zhan, Yuncheng Hua, and Gholamreza Haffari. 2024. IMO: Greedy Layer-Wise Sparse Representation Learning for Out-of-Distribution Text Classification with Pre-trained Models. arxiv: 2404.13504 [cs.CL] https:\/\/arxiv.org\/abs\/2404.13504"},{"key":"e_1_3_2_1_19_1","volume-title":"Louis Philippe Morency, and Soujanya Poria","author":"Han Wei","year":"2021","unstructured":"Wei Han, Hui Chen, Alexander Gelbukh, Amir Zadeh, Louis Philippe Morency, and Soujanya Poria. 2021. Bi-Bimodal Modality Fusion for Correlation-Controlled Multimodal Sentiment Analysis. In ICMI 2021-Proceedings of the 2021 International Conference on Multimodal Interaction. Association for Computing Machinery, Inc, 6--15."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_2_1_21_1","volume-title":"Nonlinear causal discovery with additive noise models. Advances in neural information processing systems","author":"Hoyer Patrik","year":"2008","unstructured":"Patrik Hoyer, Dominik Janzing, Joris M Mooij, Jonas Peters, and Bernhard Sch\u00f6lkopf. 2008. Nonlinear causal discovery with additive noise models. Advances in neural information processing systems, Vol. 21 (2008)."},{"key":"e_1_3_2_1_22_1","first-page":"11450","article-title":"Improving multi-task generalization via regularizing spurious correlation","volume":"35","author":"Hu Ziniu","year":"2022","unstructured":"Ziniu Hu, Zhe Zhao, Xinyang Yi, Tiansheng Yao, Lichan Hong, Yizhou Sun, and Ed Chi. 2022. Improving multi-task generalization via regularizing spurious correlation. Advances in Neural Information Processing Systems, Vol. 35 (2022), 11450--11466.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","volume-title":"Binarized neural networks. Advances in neural information processing systems","author":"Hubara Itay","year":"2016","unstructured":"Itay Hubara, Matthieu Courbariaux, Daniel Soudry, Ran El-Yaniv, and Yoshua Bengio. 2016. Binarized neural networks. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_24_1","volume-title":"Dynamic Sparse Training: Find Efficient Sparse Network From Scratch With Trainable Masked Layers. In International Conference on Learning Representations.","author":"Junjie LIU","year":"2019","unstructured":"LIU Junjie, XU Zhe, SHI Runbin, Ray CC Cheung, and Hayden KH So. 2019. Dynamic Sparse Training: Find Efficient Sparse Network From Scratch With Trainable Masked Layers. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_25_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_26_1","volume-title":"International Conference on Machine Learning. PMLR, 5544--5555","author":"Kusupati Aditya","year":"2020","unstructured":"Aditya Kusupati, Vivek Ramanujan, Raghav Somani, Mitchell Wortsman, Prateek Jain, Sham Kakade, and Ali Farhadi. 2020. Soft threshold weight reparameterization for learnable sparsity. In International Conference on Machine Learning. PMLR, 5544--5555."},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730--19742."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_38"},{"key":"e_1_3_2_1_29_1","volume-title":"Super tickets in pre-trained language models: From model compression to improving generalization. arXiv preprint arXiv:2105.12002","author":"Liang Chen","year":"2021","unstructured":"Chen Liang, Simiao Zuo, Minshuo Chen, Haoming Jiang, Xiaodong Liu, Pengcheng He, Tuo Zhao, and Weizhu Chen. 2021. Super tickets in pre-trained language models: From model compression to improving generalization. arXiv preprint arXiv:2105.12002 (2021)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i7.26059"},{"key":"e_1_3_2_1_31_1","volume-title":"Information recovery-driven deep incomplete multiview clustering network","author":"Liu Chengliang","year":"2023","unstructured":"Chengliang Liu, Jie Wen, Zhihao Wu, Xiaoling Luo, Chao Huang, and Yong Xu. 2023. Information recovery-driven deep incomplete multiview clustering network. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3194332"},{"key":"e_1_3_2_1_33_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Ray CC Cheung, and Hayden KH So","author":"Liu Junjie","year":"2020","unstructured":"Junjie Liu, Zhe Xu, Runbin Shi, Ray CC Cheung, and Hayden KH So. 2020. Dynamic sparse training: Find efficient sparse network from scratch with trainable masked layers. arXiv preprint arXiv:2005.06870 (2020)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1209"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00258"},{"key":"e_1_3_2_1_37_1","volume-title":"Curriculum Learning Meets Weakly Supervised Modality Correlation Learning. arXiv preprint arXiv:2212.07619","author":"Mai Sijie","year":"2022","unstructured":"Sijie Mai, Ya Sun, and Haifeng Hu. 2022. Curriculum Learning Meets Weakly Supervised Modality Correlation Learning. arXiv preprint arXiv:2212.07619 (2022)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00934"},{"key":"e_1_3_2_1_39_1","volume-title":"International conference on machine learning. PMLR, 10--18","author":"Muandet Krikamol","year":"2013","unstructured":"Krikamol Muandet, David Balduzzi, and Bernhard Sch\u00f6lkopf. 2013. Domain generalization via invariant feature representation. In International conference on machine learning. PMLR, 10--18."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1111\/rssb.12167"},{"key":"e_1_3_2_1_41_1","unstructured":"Jonas Peters Joris M Mooij Dominik Janzing and Bernhard Sch\u00f6lkopf. 2014. Causal discovery with continuous additive noise models. (2014)."},{"key":"e_1_3_2_1_42_1","first-page":"1256","article-title":"Gradient starvation: A learning proclivity in neural networks","volume":"34","author":"Pezeshki Mohammad","year":"2021","unstructured":"Mohammad Pezeshki, Oumar Kaba, Yoshua Bengio, Aaron C Courville, Doina Precup, and Guillaume Lajoie. 2021. Gradient starvation: A learning proclivity in neural networks. Advances in Neural Information Processing Systems, Vol. 34 (2021), 1256--1272.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","first-page":"1256","article-title":"Gradient starvation: A learning proclivity in neural networks","volume":"34","author":"Pezeshki Mohammad","year":"2021","unstructured":"Mohammad Pezeshki, Oumar Kaba, Yoshua Bengio, Aaron C Courville, Doina Precup, and Guillaume Lajoie. 2021. Gradient starvation: A learning proclivity in neural networks. Advances in Neural Information Processing Systems, Vol. 34 (2021), 1256--1272.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_44_1","volume-title":"Meld: A multimodal multi-party dataset for emotion recognition in conversations. arXiv preprint arXiv:1810.02508","author":"Poria Soujanya","year":"2018","unstructured":"Soujanya Poria, Devamanyu Hazarika, Navonil Majumder, Gautam Naik, Erik Cambria, and Rada Mihalcea. 2018. Meld: A multimodal multi-party dataset for emotion recognition in conversations. arXiv preprint arXiv:1810.02508 (2018)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02312"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02312"},{"key":"e_1_3_2_1_47_1","volume-title":"International Conference on Machine Learning. PMLR, 28468--28491","author":"Quinzan Francesco","year":"2023","unstructured":"Francesco Quinzan, Ashkan Soleymani, Patrick Jaillet, Cristian R Rojas, and Stefan Bauer. 2023. Drcfs: Doubly robust causal feature selection. In International Conference on Machine Learning. PMLR, 28468--28491."},{"key":"e_1_3_2_1_48_1","volume-title":"International Conference on Machine Learning. PMLR, 28468--28491","author":"Quinzan Francesco","year":"2023","unstructured":"Francesco Quinzan, Ashkan Soleymani, Patrick Jaillet, Cristian R Rojas, and Stefan Bauer. 2023. Drcfs: Doubly robust causal feature selection. In International Conference on Machine Learning. PMLR, 28468--28491."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"e_1_3_2_1_50_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Rame Alexandre","year":"2022","unstructured":"Alexandre Rame, Corentin Dancette, and Matthieu Cord. 2022. Fishr: Invariant gradient variances for out-of-distribution generalization. In International Conference on Machine Learning. PMLR, 18347--18377."},{"key":"e_1_3_2_1_51_1","volume-title":"Dynamicvit: Efficient vision transformers with dynamic token sparsification. Advances in neural information processing systems","author":"Rao Yongming","year":"2021","unstructured":"Yongming Rao, Wenliang Zhao, Benlin Liu, Jiwen Lu, Jie Zhou, and Cho-Jui Hsieh. 2021. Dynamicvit: Efficient vision transformers with dynamic token sparsification. Advances in neural information processing systems, Vol. 34 (2021), 13937--13949."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_32"},{"key":"e_1_3_2_1_53_1","volume-title":"On causal and anticausal learning. arXiv preprint arXiv:1206.6471","author":"Sch\u00f6lkopf Bernhard","year":"2012","unstructured":"Bernhard Sch\u00f6lkopf, Dominik Janzing, Jonas Peters, Eleni Sgouritsa, Kun Zhang, and Joris Mooij. 2012. On causal and anticausal learning. arXiv preprint arXiv:1206.6471 (2012)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3058954"},{"key":"e_1_3_2_1_55_1","volume-title":"Proceedings of the conference. Association for Computational Linguistics. Meeting","volume":"2019","author":"Hubert Tsai Yao-Hung","year":"2019","unstructured":"Yao-Hung Hubert Tsai, Shaojie Bai, Paul Pu Liang, J Zico Kolter, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2019. Multimodal transformer for unaligned multimodal language sequences. In Proceedings of the conference. Association for Computational Linguistics. Meeting, Vol. 2019. NIH Public Access, 6558."},{"key":"e_1_3_2_1_56_1","volume-title":"Learning Factorized Multimodal Representations. In International Conference on Representation Learning.","author":"Hubert Tsai Yao-Hung","year":"2019","unstructured":"Yao-Hung Hubert Tsai, Paul Pu Liang, Amir Zadeh, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2019. Learning Factorized Multimodal Representations. In International Conference on Representation Learning."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00807"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109259"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3183830"},{"key":"e_1_3_2_1_60_1","volume-title":"Dual-perspective fusion network for aspect-based multimodal sentiment analysis","author":"Wang Di","year":"2023","unstructured":"Di Wang, Changning Tian, Xiao Liang, Lin Zhao, Lihuo He, and Quan Wang. 2023. Dual-perspective fusion network for aspect-based multimodal sentiment analysis. IEEE Transactions on Multimedia (2023)."},{"key":"e_1_3_2_1_61_1","volume-title":"Generalizing to unseen domains: A survey on domain generalization","author":"Wang Jindong","year":"2022","unstructured":"Jindong Wang, Cuiling Lan, Chang Liu, Yidong Ouyang, Tao Qin, Wang Lu, Yiqiang Chen, Wenjun Zeng, and Philip Yu. 2022. Generalizing to unseen domains: A survey on domain generalization. IEEE Transactions on Knowledge and Data Engineering (2022)."},{"key":"e_1_3_2_1_62_1","volume-title":"Eda: Easy data augmentation techniques for boosting performance on text classification tasks. arXiv preprint arXiv:1901.11196","author":"Wei Jason","year":"2019","unstructured":"Jason Wei and Kai Zou. 2019. Eda: Easy data augmentation techniques for boosting performance on text classification tasks. arXiv preprint arXiv:1901.11196 (2019)."},{"key":"e_1_3_2_1_63_1","volume-title":"Unsupervised data augmentation for consistency training. Advances in neural information processing systems","author":"Xie Qizhe","year":"2020","unstructured":"Qizhe Xie, Zihang Dai, Eduard Hovy, Thang Luong, and Quoc Le. 2020. Unsupervised data augmentation for consistency training. Advances in neural information processing systems, Vol. 33 (2020), 6256--6268."},{"key":"e_1_3_2_1_64_1","unstructured":"Zhe Xu and Ray CC Cheung. [n. d.]. Accurate and Compact Convolutional Neural Networks with Trained Binarization. ( [n. d.])."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547754"},{"key":"e_1_3_2_1_66_1","first-page":"19448","article-title":"Adversarial teacher-student representation learning for domain generalization","volume":"34","author":"Yang Fu-En","year":"2021","unstructured":"Fu-En Yang, Yuan-Chia Cheng, Zu-Yun Shiau, and Yu-Chiang Frank Wang. 2021. Adversarial teacher-student representation learning for domain generalization. Advances in Neural Information Processing Systems, Vol. 34 (2021), 19448--19460.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.79"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.421"},{"key":"e_1_3_2_1_69_1","volume-title":"MM-BigBench: Evaluating Multimodal Models on Multimodal Content Comprehension Tasks. arXiv preprint arXiv:2310.09036","author":"Yang Xiaocui","year":"2023","unstructured":"Xiaocui Yang, Wenfang Wu, Shi Feng, Ming Wang, Daling Wang, Yang Li, Qi Sun, Yifei Zhang, Xiaoming Fu, and Soujanya Poria. 2023. MM-BigBench: Evaluating Multimodal Models on Multimodal Content Comprehension Tasks. arXiv preprint arXiv:2310.09036 (2023)."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1115"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"e_1_3_2_1_72_1","volume-title":"Mosi: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv preprint arXiv:1606.06259","author":"Zadeh Amir","year":"2016","unstructured":"Amir Zadeh, Rowan Zellers, Eli Pincus, and Louis-Philippe Morency. 2016. Mosi: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv preprint arXiv:1606.06259 (2016)."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1208"},{"key":"e_1_3_2_1_74_1","volume-title":"Learning Language-guided Adaptive Hyper-modality Representation for Multimodal Sentiment Analysis. arXiv preprint arXiv:2310.05804","author":"Zhang Haoyu","year":"2023","unstructured":"Haoyu Zhang, Yu Wang, Guanghao Yin, Kejun Liu, Yuanyuan Liu, and Tianshu Yu. 2023. Learning Language-guided Adaptive Hyper-modality Representation for Multimodal Sentiment Analysis. arXiv preprint arXiv:2310.05804 (2023)."},{"key":"e_1_3_2_1_75_1","volume-title":"MAG: An Extended Multimodal Adaptation Gate for Multimodal Sentiment Analysis. In ICASSP 2022--2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 4753--4757","author":"Zhao Xianbing","year":"2022","unstructured":"Xianbing Zhao, Yixin Chen, Wanting Li, Lei Gao, and Buzhou Tang. 2022. MAG: An Extended Multimodal Adaptation Gate for Multimodal Sentiment Analysis. In ICASSP 2022--2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 4753--4757."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3222023"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583406"},{"key":"e_1_3_2_1_78_1","volume-title":"Dorefa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160","author":"Zhou Shuchang","year":"2016","unstructured":"Shuchang Zhou, Yuxin Wu, Zekun Ni, Xinyu Zhou, He Wen, and Yuheng Zou. 2016. Dorefa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160 (2016)."},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612468"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681056","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681056","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:52Z","timestamp":1750294672000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681056"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":79,"alternative-id":["10.1145\/3664647.3681056","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681056","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}