{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:09:49Z","timestamp":1765339789786,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","funder":[{"name":"National Key R&D Program of China","award":["2024YFB3311600"],"award-info":[{"award-number":["2024YFB3311600"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272422;U22B2051"],"award-info":[{"award-number":["62272422;U22B2051"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754954","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:56:43Z","timestamp":1761371803000},"page":"180-188","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Robust Multimodal Domain Generalization via Modality-Domain Joint Adversarial Training"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6611-7896","authenticated-orcid":false,"given":"Hongzhao","family":"Li","sequence":"first","affiliation":[{"name":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1764-1740","authenticated-orcid":false,"given":"Hualei","family":"Wan","sequence":"additional","affiliation":[{"name":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0933-7285","authenticated-orcid":false,"given":"Liangzhi","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4868-0709","authenticated-orcid":false,"given":"Mingyuan","family":"Jiu","sequence":"additional","affiliation":[{"name":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5823-2037","authenticated-orcid":false,"given":"Shupan","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6885-3451","authenticated-orcid":false,"given":"Mingliang","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer and Artificial Intelligence, Zhengzhou University, Zhengzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9746-276X","authenticated-orcid":false,"given":"Muhammad Haris","family":"Khan","sequence":"additional","affiliation":[{"name":"Mohamed Bin Zayed University of Artificial Intelligence, Abu Dhabi, United Arab Emirates"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Metareg: Towards domain generalization using meta-regularization. Advances in neural information processing systems","author":"Balaji Yogesh","year":"2018","unstructured":"Yogesh Balaji, Swami Sankaranarayanan, and Rama Chellappa. 2018. Metareg: Towards domain generalization using meta-regularization. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00233"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053174"},{"key":"e_1_3_2_1_4_1","unstructured":"MMAction2 Contributors. 2020. OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmaction2."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the European conference on computer vision (ECCV). 720-736","author":"Damen Dima","year":"2018","unstructured":"Dima Damen, Hazel Doughty, Giovanni Maria Farinella, Sanja Fidler, Antonino Furnari, Evangelos Kazakos, Davide Moltisanti, Jonathan Munro, Toby Perrett, Will Price, et al., 2018. Scaling egocentric vision: The epic-kitchens dataset. In Proceedings of the European conference on computer vision (ECCV). 720-736."},{"key":"e_1_3_2_1_6_1","volume-title":"European Conference on Computer Vision.","author":"Dong Hao","year":"2024","unstructured":"Hao Dong, Eleni Chatzi, and Olga Fink. 2024. Towards Multimodal Open-Set Domain Generalization and Adaptation through Self-supervision. In European Conference on Computer Vision."},{"key":"e_1_3_2_1_7_1","volume-title":"Advances in Multimodal Adaptation and Generalization: From Traditional Approaches to Foundation Models. arXiv preprint arXiv:2501.18592","author":"Dong Hao","year":"2025","unstructured":"Hao Dong, Moru Liu, Kaiyang Zhou, Eleni Chatzi, Juho Kannala, Cyrill Stachniss, and Olga Fink. 2025. Advances in Multimodal Adaptation and Generalization: From Traditional Approaches to Foundation Models. arXiv preprint arXiv:2501.18592 (2025)."},{"key":"e_1_3_2_1_8_1","first-page":"78674","article-title":"SimMMDG: A simple and effective framework for multi-modal domain generalization","volume":"36","author":"Dong Hao","year":"2023","unstructured":"Hao Dong, Ismail Nejjar, Han Sun, Eleni Chatzi, and Olga Fink. 2023. SimMMDG: A simple and effective framework for multi-modal domain generalization. Advances in Neural Information Processing Systems, Vol. 36 (2023), 78674-78695.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","volume-title":"Konstantinos Kamnitsas, and Ben Glocker.","author":"Dou Qi","year":"2019","unstructured":"Qi Dou, Daniel Coelho de Castro, Konstantinos Kamnitsas, and Ben Glocker. 2019. Domain generalization via model-agnostic learning of semantic features. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2133"},{"key":"e_1_3_2_1_11_1","first-page":"1","article-title":"Domain-adversarial training of neural networks","volume":"17","author":"Ganin Yaroslav","year":"2016","unstructured":"Yaroslav Ganin, Evgeniya Ustinova, Hana Ajakan, Pascal Germain, Hugo Larochelle, Fran\u00e7ois Laviolette, Mario March, and Victor Lempitsky. 2016. Domain-adversarial training of neural networks. Journal of machine learning research, Vol. 17, 59 (2016), 1-35.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_12_1","volume-title":"Javier Del Ser, and Shuo Li","author":"Gao Zhifan","year":"2022","unstructured":"Zhifan Gao, Saidi Guo, Chenchu Xu, Jinglin Zhang, Mingming Gong, Javier Del Ser, and Shuo Li. 2022. Multi-domain adversarial variational Bayesian inference for domain generalization. IEEE Transactions on Circuits and Systems for Video Technology (2022)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681488"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681437"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681498"},{"key":"e_1_3_2_1_16_1","unstructured":"Will Kay Joao Carreira Karen Simonyan Brian Zhang Chloe Hillier Sudheendra Vijayanarasimhan Fabio Viola Tim Green Trevor Back Paul Natsev et al. 2017. The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)."},{"key":"e_1_3_2_1_17_1","volume-title":"Salman Khan, and Fahad Shahbaz Khan.","author":"Khan Muhammad Haris","year":"2021","unstructured":"Muhammad Haris Khan, Syed Muhammad talha Zaidi, Salman Khan, and Fahad Shahbaz Khan. 2021. Mode-Guided Feature Augmentation for Domain Generalization.. In BMVC. 176."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00948"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00566"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02092-w"},{"key":"e_1_3_2_1_21_1","volume-title":"TIMA: Text-Image Mutual Awareness for Balancing Zero-Shot Adversarial Robustness and Generalization Ability. arXiv preprint arXiv:2405.17678","author":"Ma Fengji","year":"2024","unstructured":"Fengji Ma, Li Liu, and Hei Victor Cheng. 2024. TIMA: Text-Image Mutual Awareness for Balancing Zero-Shot Adversarial Robustness and Generalization Ability. arXiv preprint arXiv:2405.17678 (2024)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681422"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00024"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-01998-9"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the Asian Conference on Computer Vision. 3068-3085","author":"Sultana Maryam","year":"2022","unstructured":"Maryam Sultana, Muzammal Naseer, Muhammad Haris Khan, Salman Khan, and Fahad Shahbaz Khan. 2022. Self-distilled vision transformer for domain generalization. In Proceedings of the Asian Conference on Computer Vision. 3068-3085."},{"key":"e_1_3_2_1_26_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_27_1","volume-title":"Generalizing to unseen domains: A survey on domain generalization","author":"Wang Jindong","year":"2022","unstructured":"Jindong Wang, Cuiling Lan, Chang Liu, Yidong Ouyang, Tao Qin, Wang Lu, Yiqiang Chen, Wenjun Zeng, and S Yu Philip. 2022. Generalizing to unseen domains: A survey on domain generalization. IEEE transactions on knowledge and data engineering, Vol. 35, 8 (2022), 8052-8072."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02052-4"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681471"},{"key":"e_1_3_2_1_30_1","volume-title":"Cross-Domain Semantic Transfer for Domain Generalization. ACM Transactions on Multimedia Computing, Communications and Applications","author":"Wang Yan","year":"2025","unstructured":"Yan Wang, Hong Xie, Jinyang He, Xiaoyu Shi, and Mingsheng Shang. 2025. Cross-Domain Semantic Transfer for Domain Generalization. ACM Transactions on Multimedia Computing, Communications and Applications (2025)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28467"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02106-7"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00696"},{"key":"e_1_3_2_1_34_1","volume-title":"Domain generalization via entropy regularization. Advances in neural information processing systems","author":"Zhao Shanshan","year":"2020","unstructured":"Shanshan Zhao, Mingming Gong, Tongliang Liu, Huan Fu, and Dacheng Tao. 2020. Domain generalization via entropy regularization. Advances in neural information processing systems, Vol. 33 (2020), 16096-16107."},{"key":"e_1_3_2_1_35_1","first-page":"4396","article-title":"Domain generalization: A survey","volume":"45","author":"Zhou Kaiyang","year":"2022","unstructured":"Kaiyang Zhou, Ziwei Liu, Yu Qiao, Tao Xiang, and Chen Change Loy. 2022. Domain generalization: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 45, 4 (2022), 4396-4415.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7003"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_33"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00697"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754954","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:06:36Z","timestamp":1765339596000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754954"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":38,"alternative-id":["10.1145\/3746027.3754954","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754954","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}