{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,25]],"date-time":"2025-12-25T22:26:53Z","timestamp":1766701613808,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":69,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62162003"],"award-info":[{"award-number":["62162003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Central Guiding Local Technology Development Fund","award":["GuikeZY24212059"],"award-info":[{"award-number":["GuikeZY24212059"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754959","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:56:43Z","timestamp":1761371803000},"page":"189-198","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Object-Preserving Counterfactual Diffusion Augmentation for Single-Domain Generalized Object Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2687-6505","authenticated-orcid":false,"given":"Hongda","family":"Qin","sequence":"first","affiliation":[{"name":"School of Electrical Engineering, Guangxi University, Nanning, Guangxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0880-0160","authenticated-orcid":false,"given":"Xiao","family":"Lu","sequence":"additional","affiliation":[{"name":"College of Engineering and Design, Hunan Normal University, Changsha, Hunan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5422-1116","authenticated-orcid":false,"given":"Zhiyong","family":"Wei","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, Guangxi University, Nanning, Guangxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0187-6760","authenticated-orcid":false,"given":"Ningjiang","family":"Chen","sequence":"additional","affiliation":[{"name":"Graduate School, Guangxi University, Nanning, Guangxi, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"e_1_3_2_1_2_1","volume-title":"End-to-End Object Detection with Transformers. In European Conference on Computer Vision. 213-229","author":"Carion Nicolas","year":"2020","unstructured":"Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. 2020. End-to-End Object Detection with Transformers. In European Conference on Computer Vision. 213-229."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00889"},{"key":"e_1_3_2_1_4_1","volume-title":"GeoDiffusion: Text-Prompted Geometric Control for Object Detection Data Generation. In The Twelfth International Conference on Learning Representations.","author":"Chen Kai","year":"2024","unstructured":"Kai Chen, Enze Xie, Zhe Chen, Yibo Wang, Lanqing HONG, Zhenguo Li, and Dit-Yan Yeung. 2024b. GeoDiffusion: Text-Prompted Geometric Control for Object Detection Data Generation. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00352"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02218"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01439"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01141"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01679"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02282"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01707"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00811"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00129"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-71704-9_65"},{"key":"e_1_3_2_1_16_1","first-page":"1180","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","volume":"37","author":"Ganin Yaroslav","year":"2015","unstructured":"Yaroslav Ganin and Victor Lempitsky. 2015. Unsupervised Domain Adaptation by Backpropagation. In Proceedings of the 32nd International Conference on Machine Learning, Vol. 37. Lille, France, 1180-1189."},{"key":"e_1_3_2_1_17_1","volume-title":"International Conference on Learning Representations.","author":"Geirhos Robert","year":"2019","unstructured":"Robert Geirhos, Patricia Rubisch, Claudio Michaelis, Matthias Bethge, Felix A. Wichmann, and Wieland Brendel. 2019. ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness.. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_18_1","volume-title":"Julio Delgado Mangas, and Luc Van Gool","author":"Gong Rui","year":"2023","unstructured":"Rui Gong, Martin Danelljan, Han Sun, Julio Delgado Mangas, and Luc Van Gool. 2023. Prompting diffusion representations for cross-domain semantic segmentation. arXiv preprint arXiv:2307.02138 (2023)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00258"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680962"},{"key":"e_1_3_2_1_21_1","volume-title":"Bassel Al Omari, Soufiane Lamghari, Yasser H. Khalil, Xi Chen, and Guojun Zhang.","author":"Hemati Sobhan","year":"2024","unstructured":"Sobhan Hemati, Mahdi Beitollahi, Amir Hossein Estiri, Bassel Al Omari, Soufiane Lamghari, Yasser H. Khalil, Xi Chen, and Guojun Zhang. 2024. Beyond Loss Functions: Exploring Data-Centric Approaches with Diffusion Model for Domain Generalization. Transactions on Machine Learning Research (2024)."},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Learning Representations.","author":"Hendrycks Dan","year":"2020","unstructured":"Dan Hendrycks, Norman Mu, Ekin Dogus Cubuk, Barret Zoph, Justin Gilmer, and Balaji Lakshminarayanan. 2020. AugMix: A Simple Method to Improve Robustness and Uncertainty under Data Shift. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_23_1","first-page":"8633","article-title":"Video Diffusion Models","volume":"35","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, Tim Salimans, Alexey Gritsenko, William Chan, Mohammad Norouzi, and David J Fleet. 2022. Video Diffusion Models. In Advances in Neural Information Processing Systems, Vol. 35. 8633-8646.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","volume-title":"European Conference on Computer Vision. 666-670","author":"Nazari Narges Honarvar","year":"2020","unstructured":"Narges Honarvar Nazari and Adriana Kovashka. 2020. Domain generalization using shape representation. In European Conference on Computer Vision. 666-670."},{"key":"e_1_3_2_1_25_1","first-page":"13213","volume-title":"Proceedings of the 40th International Conference on Machine Learning","volume":"202","author":"Hoogeboom Emiel","year":"2023","unstructured":"Emiel Hoogeboom, Jonathan Heek, and Tim Salimans. 2023. simple diffusion: End-to-end diffusion for high resolution images. In Proceedings of the 40th International Conference on Machine Learning, Vol. 202. 13213-13232."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00501"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72940-9_27"},{"key":"e_1_3_2_1_28_1","volume-title":"European Conference on Computer Vision. 91-109","author":"Jia Yuru","year":"2024","unstructured":"Yuru Jia, Lukas Hoyer, Shengyu Huang, Tianfu Wang, Luc Van Gool, Konrad Schindler, and Anton Obukhov. 2024. Dginstyle: Domain-generalizable semantic segmentation with image diffusion models and stylized semantic control. In European Conference on Computer Vision. 91-109."},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 16541-16550","author":"Kennerley Mikhail","key":"e_1_3_2_1_29_1","unstructured":"Mikhail Kennerley, Jian-Gang Wang, Bharadwaj Veeravalli, and Robby T. Tan. 2024. CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 16541-16550."},{"key":"e_1_3_2_1_30_1","first-page":"18661","article-title":"Supervised Contrastive Learning","volume":"33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised Contrastive Learning. In Advances in Neural Information Processing Systems, Vol. 33. 18661-18673.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i4.28076"},{"key":"e_1_3_2_1_32_1","first-page":"30146","article-title":"BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing","volume":"36","author":"Junnan Li DONGXU LI","year":"2023","unstructured":"DONGXU LI, Junnan Li, and Steven Hoi. 2023. BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing. In Advances in Neural Information Processing Systems, Vol. 36. 30146-30166.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_33_1","volume-title":"Object Style Diffusion for Generalized Object Detection in Urban Scene. arXiv preprint arXiv:2412.13815","author":"Li Hao","year":"2024","unstructured":"Hao Li, Xiangyuan Yang, Mengzhu Wang, Long Lan, Ke Liang, Xinwang Liu, and Kenli Li. 2024. Object Style Diffusion for Generalized Object Detection in Urban Scene. arXiv preprint arXiv:2412.13815 (2024)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00029"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02724"},{"key":"e_1_3_2_1_36_1","first-page":"2208","volume-title":"Deep Transfer Learning with Joint Adaptation Networks. In Proceedings of the 34th International Conference on Machine Learning","volume":"70","author":"Long Mingsheng","unstructured":"Mingsheng Long, Han Zhu, Jianmin Wang, and Michael I. Jordan. 2017. Deep Transfer Learning with Joint Adaptation Networks. In Proceedings of the 34th International Conference on Machine Learning, Vol. 70. 2208-2217."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612191"},{"key":"e_1_3_2_1_38_1","volume-title":"Instance Adaptive Self-training for Unsupervised Domain Adaptation. In European Conference on Computer Vision. 415-430","author":"Mei Ke","year":"2020","unstructured":"Ke Mei, Chuang Zhu, Jiaqi Zou, and Shanghang Zhang. 2020. Instance Adaptive Self-training for Unsupervised Domain Adaptation. In European Conference on Computer Vision. 415-430."},{"key":"e_1_3_2_1_39_1","volume-title":"Benchmarking robustness in object detection: Autonomous driving when winter is coming. arXiv preprint arXiv:1907.07484","author":"Michaelis Claudio","year":"2019","unstructured":"Claudio Michaelis, Benjamin Mitzkus, Robert Geirhos, Evgenia Rusak, Oliver Bringmann, Alexander S Ecker, Matthias Bethge, and Wieland Brendel. 2019. Benchmarking robustness in object detection: Autonomous driving when winter is coming. arXiv preprint arXiv:1907.07484 (2019)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00281"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3217046"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_29"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00195"},{"key":"e_1_3_2_1_44_1","volume-title":"Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Doll\u00e1r, and Christoph Feichtenhofer.","author":"Ravi Nikhila","year":"2024","unstructured":"Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman R\u00e4dle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Doll\u00e1r, and Christoph Feichtenhofer. 2024. SAM 2: Segment Anything in Images and Videos. arXiv preprint arXiv:2408.00714 (2024)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00727"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00705"},{"key":"e_1_3_2_1_51_1","first-page":"1363","article-title":"Emergent Correspondence from Image Diffusion","volume":"36","author":"Tang Luming","year":"2023","unstructured":"Luming Tang, Menglin Jia, Qianqian Wang, Cheng Perng Phoo, and Bharath Hariharan. 2023. Emergent Correspondence from Image Diffusion. In Advances in Neural Information Processing Systems, Vol. 36. 1363-1389.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00972"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.316"},{"key":"e_1_3_2_1_54_1","volume-title":"Deep domain confusion: Maximizing for domain invariance. arXiv preprint arXiv:1412.3474","author":"Tzeng Eric","year":"2014","unstructured":"Eric Tzeng, Judy Hoffman, Ning Zhang, Kate Saenko, and Trevor Darrell. 2014. Deep domain confusion: Maximizing for domain invariance. arXiv preprint arXiv:1412.3474 (2014)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00314"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3178128"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00596"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00092"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i20.35487"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02209"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_63_1","first-page":"11127","article-title":"Uni-ControlNet: All-in-One Control to Text-to-Image Diffusion Models","volume":"36","author":"Zhao Shihao","year":"2023","unstructured":"Shihao Zhao, Dongdong Chen, Yen-Chun Chen, Jianmin Bao, Shaozhe Hao, Lu Yuan, and Kwan-Yee K. Wong. 2023. Uni-ControlNet: All-in-One Control to Text-to-Image Diffusion Models. In Advances in Neural Information Processing Systems, Vol. 36. 11127-11150.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_64_1","first-page":"16096","article-title":"Domain Generalization via Entropy Regularization","volume":"33","author":"Zhao Shanshan","year":"2020","unstructured":"Shanshan Zhao, Mingming Gong, Tongliang Liu, Huan Fu, and Dacheng Tao. 2020. Domain Generalization via Entropy Regularization. In Advances in Neural Information Processing Systems, Vol. 33. 16096-16107.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3195549"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01913-8"},{"volume-title":"Learning Data Augmentation Strategies for Object Detection. In European Conference on Computer Vision. 566-583","author":"Zoph Barret","key":"e_1_3_2_1_67_1","unstructured":"Barret Zoph, Ekin D. Cubuk, Golnaz Ghiasi, Tsung-Yi Lin, Jonathon Shlens, and Quoc V. Le. 2020. Learning Data Augmentation Strategies for Object Detection. In European Conference on Computer Vision. 566-583."},{"key":"e_1_3_2_1_68_1","volume-title":"ProGBA: Prompt Guided Bayesian Augmentation for Zero-Shot Domain Adaptation. In European Conference on Computer Vision. 241-261","author":"Zou Jian","year":"2025","unstructured":"Jian Zou, Guanglei Yang, Tao Luo, Chun-Mei Feng, and Wangmeng Zuo. 2025. ProGBA: Prompt Guided Bayesian Augmentation for Zero-Shot Domain Adaptation. In European Conference on Computer Vision. 241-261."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00608"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754959","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:06:12Z","timestamp":1765339572000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754959"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":69,"alternative-id":["10.1145\/3746027.3754959","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754959","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}