{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:14:25Z","timestamp":1765008865615,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":92,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1145\/3743093.3770946","type":"proceedings-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:06:16Z","timestamp":1765008376000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Dual-Space Adaptive Fusion for Self-supervised Text-guided Image Editing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5919-620X","authenticated-orcid":false,"given":"Qingyang","family":"Liu","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3627-0681","authenticated-orcid":false,"given":"Haonan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1970-8634","authenticated-orcid":false,"given":"Li","family":"Niu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_41"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00843"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00846"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_2"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02062"},{"key":"e_1_3_3_1_10_2","unstructured":"Ruoxi Chen Haibo Jin Jinyin Chen and Lichao Sun. 2023. EditShield: Protecting Unauthorized Image Editing by Instruction-guided Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.12066 (2023)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00606"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02078"},{"key":"e_1_3_3_1_13_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Cho Hansam","unstructured":"Hansam Cho, Jonghyun Lee, Seoung\u00a0Bum Kim, Tae-Hyun Oh, and Yonghyun Jeong. [n. d.]. Noise Map Guidance: Inversion with Spatial Context for Real Image Editing. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_36"},{"key":"e_1_3_3_1_15_2","unstructured":"Guillaume Couairon Jakob Verbeek Holger Schwenk and Matthieu Cord. 2022. Diffedit: Diffusion-based semantic image editing with mask guidance. CoRR abs\/2210.11427 (2022)."},{"key":"e_1_3_3_1_16_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Fu Tsu-Jui","unstructured":"Tsu-Jui Fu, Wenze Hu, Xianzhi Du, William\u00a0Yang Wang, Yinfei Yang, and Zhe Gan. [n. d.]. Guiding Instruction-based Image Editing via Multimodal Large Language Models. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Rinon Gal Or Patashnik Haggai Maron Amit\u00a0H Bermano Gal Chechik and Daniel Cohen-Or. 2022. StyleGAN-NADA: CLIP-guided domain adaptation of image generators. ACM TOG 41 4 (2022) 1\u201313.","DOI":"10.1145\/3528223.3530164"},{"key":"e_1_3_3_1_18_2","unstructured":"Jing Gu Yilin Wang Nanxuan Zhao Wei Xiong Qing Liu Zhifei Zhang He Zhang Jianming Zhang HyunJoon Jung and Xin\u00a0Eric Wang. 2024. Swapanything: Enabling arbitrary object swapping in personalized visual editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.05717 (2024)."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657444"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00667"},{"key":"e_1_3_3_1_21_2","volume-title":"The Eleventh International Conference on Learning Representations","author":"Hertz Amir","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-or. [n. d.]. Prompt-to-Prompt Image Editing with Cross-Attention Control. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_3_1_22_2","unstructured":"Amir Hertz Ron Mokady Jay Tenenbaum Kfir Aberman Yael Pritch and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control.(2022). CoRR abs\/2208.01626 (2022)."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"e_1_3_3_1_24_2","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. NeurIPS 30 (2017)."},{"key":"e_1_3_3_1_25_2","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. NeurIPS 33 (2020) 6840\u20136851."},{"key":"e_1_3_3_1_26_2","unstructured":"Ming-Yang Ho Che-Ming Wu Min-Sheng Wu and Yufeng\u00a0Jane Tseng. 2024. Every Pixel Has its Moments: Ultra-High-Resolution Unpaired Image-to-Image Translation via Dense Normalization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.04245 (2024)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01185"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"crossref","unstructured":"Yuming Jiang Nanxuan Zhao Qing Liu Krishna\u00a0Kumar Singh Shuai Yang Chen\u00a0Change Loy and Ziwei Liu. 2024. GroupDiff: Diffusion-based Group Portrait Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.14379 (2024).","DOI":"10.1007\/978-3-031-72754-2_13"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00859"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_43"},{"key":"e_1_3_3_1_31_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Ju Xuan","year":"2024","unstructured":"Xuan Ju, Ailing Zeng, Yuxuan Bian, Shaoteng Liu, and Qiang Xu. 2024. Pnp inversion: Boosting diffusion-based editing with 3 lines of code. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Wonjun Kang Kevin Galim and Hyung\u00a0Il Koo. 2024. Eta Inversion: Designing an Optimal Eta Function for Diffusion-based Real Image Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.09468 (2024).","DOI":"10.1007\/978-3-031-72630-9_6"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01778"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_3_1_37_2","unstructured":"Eungbean Lee Somi Jeong and Kwanghoon Sohn. 2024. EBDM: Exemplar-guided Image Translation with Brownian-bridge Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.09802 (2024)."},{"key":"e_1_3_3_1_38_2","unstructured":"Hyunsoo Lee Minsoo Kang and Bohyung Han. 2024. Conditional score guidance for text-driven image-to-image translation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_1_39_2","unstructured":"Junsung Lee Minsoo Kang and Bohyung Han. 2024. Diffusion-Based Image-to-Image Translation by Noise Correction via Prompt Interpolation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.08077 (2024)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00194"},{"key":"e_1_3_3_1_41_2","unstructured":"Dongxu Li Junnan Li and Steven Hoi. 2024. Blip-diffusion: Pre-trained subject representation for controllable text-to-image generation and editing. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_1_42_2","first-page":"19730","volume-title":"International conference on machine learning","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730\u201319742."},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00598"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00747"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01247"},{"key":"e_1_3_3_1_47_2","unstructured":"Shilong Liu Zhaoyang Zeng Tianhe Ren Feng Li Hao Zhang Jie Yang Chunyuan Li Jianwei Yang Hang Su Jun Zhu et\u00a0al. 2023. Grounding dino: Marrying dino with grounded pre-training for open-set object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.05499 (2023)."},{"key":"e_1_3_3_1_48_2","unstructured":"Chenlin Meng Yutong He Yang Song Jiaming Song Jiajun Wu Jun-Yan Zhu and Stefano Ermon. 2021. Sdedit: Guided image synthesis and editing with stochastic differential equations. CoRR abs\/2108.01073 (2021)."},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72920-1_7"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00811"},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"crossref","unstructured":"Chong Mou Xintao Wang Liangbin Xie Yanze Wu Jian Zhang Zhongang Qi Ying Shan and Xiaohu Qie. 2023. T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. CoRR abs\/2302.08453 (2023).","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72670-5_18"},{"key":"e_1_3_3_1_54_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Nie Shen","unstructured":"Shen Nie, Hanzhong\u00a0Allan Guo, Cheng Lu, Yuhao Zhou, Chenyu Zheng, and Chongxuan Li. [n. d.]. The Blessing of Randomness: SDE Beats ODE in General Diffusion-based Image Editing. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"crossref","unstructured":"Yotam Nitzan Zongze Wu Richard Zhang Eli Shechtman Daniel Cohen-Or Taesung Park and Micha\u00ebl Gharbi. 2024. Lazy Diffusion Transformer for Interactive Image Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.12382 (2024).","DOI":"10.1007\/978-3-031-72691-0_4"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591500"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01458"},{"key":"e_1_3_3_1_58_2","unstructured":"Yong-Hyun Park Mingi Kwon Jaewoong Choi Junghyo Jo and Youngjung Uh. 2023. Understanding the latent space of diffusion models through the lens of riemannian geometry. Advances in Neural Information Processing Systems 36 (2023) 24129\u201324142."},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591513"},{"key":"e_1_3_3_1_60_2","volume-title":"ICML","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In ICML."},{"key":"e_1_3_3_1_61_2","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.06125 1 2 (2022) 3."},{"key":"e_1_3_3_1_62_2","doi-asserted-by":"crossref","unstructured":"Yuxi Ren Jie Wu Yanzuo Lu Huafeng Kuang Xin Xia Xionghui Wang Qianqian Wang Yixing Zhu Pan Xie Shiyin Wang et\u00a0al. 2024. ByteEdit: Boost Comply and Accelerate Generative Image Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.04860 (2024).","DOI":"10.1007\/978-3-031-72646-0_11"},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01221"},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_3_1_66_2","unstructured":"Chitwan Saharia William Chan Saurabh Saxena Lala Li Jay Whang Emily\u00a0L Denton Kamyar Ghasemipour Raphael Gontijo\u00a0Lopes Burcu Karagol\u00a0Ayan Tim Salimans et\u00a0al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. NeurIPS 35 (2022) 36479\u201336494."},{"key":"e_1_3_3_1_67_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00847"},{"key":"e_1_3_3_1_68_2","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2020. Denoising diffusion implicit models. CoRR abs\/2010.02502 (2020)."},{"key":"e_1_3_3_1_69_2","unstructured":"Yang Song Jascha Sohl-Dickstein Diederik\u00a0P Kingma Abhishek Kumar Stefano Ermon and Ben Poole. 2020. Score-based generative modeling through stochastic differential equations. CoRR abs\/2011.13456 (2020)."},{"key":"e_1_3_3_1_70_2","unstructured":"Nikita Starodubcev Mikhail Khoroshikh Artem Babenko and Dmitry Baranchuk. 2024. Invertible Consistency Distillation for Text-Guided Image Editing in Around 7 Steps. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.14539 (2024)."},{"key":"e_1_3_3_1_71_2","volume-title":"The Eleventh International Conference on Learning Representations","author":"Su Xuan","unstructured":"Xuan Su, Jiaming Song, Chenlin Meng, and Stefano Ermon. [n. d.]. Dual Diffusion Implicit Bridges for Image-to-Image Translation. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_3_1_72_2","doi-asserted-by":"crossref","unstructured":"Vadim Titov Madina Khalmatova Alexandra Ivanova Dmitry Vetrov and Aibek Alanov. 2024. Guide-and-Rescale: Self-Guidance Mechanism for Effective Tuning-Free Real Image Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.01322 (2024).","DOI":"10.1007\/978-3-031-73209-6_14"},{"key":"e_1_3_3_1_73_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"e_1_3_3_1_74_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00687"},{"key":"e_1_3_3_1_75_2","unstructured":"Qian Wang Biao Zhang Michael Birsak and Peter Wonka. 2023. InstructEdit: Improving Automatic Masks for Diffusion-based Image Editing With User Instructions. CoRR abs\/2305.18047 (2023)."},{"key":"e_1_3_3_1_76_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01109"},{"key":"e_1_3_3_1_77_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01848"},{"key":"e_1_3_3_1_78_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00678"},{"key":"e_1_3_3_1_79_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00189"},{"key":"e_1_3_3_1_80_2","doi-asserted-by":"crossref","unstructured":"Wei Wu Qingnan Fan Shuai Qin Hong Gu Ruoyu Zhao and Antoni\u00a0B Chan. 2024. FreeDiff: Progressive Frequency Truncation for Image Editing with Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.11895 (2024).","DOI":"10.1007\/978-3-031-72652-1_12"},{"key":"e_1_3_3_1_81_2","unstructured":"Zongze Wu Nicholas Kolkin Jonathan Brandt Richard Zhang and Eli Shechtman. 2024. TurboEdit: Instant text-based image editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.08332 (2024)."},{"key":"e_1_3_3_1_82_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00903"},{"key":"e_1_3_3_1_83_2","unstructured":"Sihan Xu Ziqiao Ma Yidong Huang Honglak Lee and Joyce Chai. 2024. Cyclenet: Rethinking cycle consistency in text-guided diffusion for image manipulation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_1_84_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_3_1_85_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Yang Ling","unstructured":"Ling Yang, Zhilong Zhang, Zhaochen Yu, Jingwei Liu, Minkai Xu, Stefano Ermon, and CUI Bin. [n. d.]. Cross-Modal Contextualized Diffusion Models for Text-Guided Visual Generation and Editing. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_86_2","unstructured":"Yifan Yang Houwen Peng Yifei Shen Yuqing Yang Han Hu Lili Qiu Hideki Koike et\u00a0al. 2024. Imagebrush: Learning visual in-context instructions for exemplar-based image manipulation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_1_87_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Yang Zhen","unstructured":"Zhen Yang, Ganggui Ding, Wen Wang, Hao Chen, Bohan Zhuang, and Chunhua Shen. [n. d.]. Object-Aware Inversion and Reassembly for Image Editing. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_88_2","unstructured":"Kai Zhang Lingbo Mo Wenhu Chen Huan Sun and Yu Su. 2024. Magicbrush: A manually annotated dataset for instruction-guided image editing. NeurIPS 36 (2024)."},{"key":"e_1_3_3_1_89_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_1_90_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_3_1_91_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00584"},{"key":"e_1_3_3_1_92_2","unstructured":"Haozhe Zhao Xiaojian Ma Liang Chen Shuzheng Si Rujie Wu Kaikai An Peiyu Yu Minjia Zhang Qing Li and Baobao Chang. 2024. UltraEdit: Instruction-based Fine-Grained Image Editing at Scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.05282 (2024)."},{"key":"e_1_3_3_1_93_2","unstructured":"Min Zhao Fan Bao Chongxuan Li and Jun Zhu. 2022. Egsde: Unpaired image-to-image translation via energy-guided stochastic differential equations. Advances in Neural Information Processing Systems 35 (2022) 3609\u20133623."}],"event":{"name":"MMAsia '25: ACM Multimedia Asia","location":"Kuala Lumpur Malaysia","acronym":"MMAsia '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 7th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3743093.3770946","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:10:39Z","timestamp":1765008639000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3743093.3770946"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":92,"alternative-id":["10.1145\/3743093.3770946","10.1145\/3743093"],"URL":"https:\/\/doi.org\/10.1145\/3743093.3770946","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}