{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:57:50Z","timestamp":1774022270313,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","funder":[{"name":"Guangzhou Basic Research Scheme","award":["2024A04J4229"],"award-info":[{"award-number":["2024A04J4229"]}]},{"name":"Guangzhou Industrial Information and Intelligent Key Laboratory Project","award":["2024A03J0628"],"award-info":[{"award-number":["2024A03J0628"]}]},{"name":"Research Grants Council of Hong Kong","award":["C5055-24G and T45-401\/22-N"],"award-info":[{"award-number":["C5055-24G and T45-401\/22-N"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62201483"],"award-info":[{"award-number":["62201483"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730731","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["AssetDropper: Asset Extraction via Diffusion Models with Reward-Driven Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6738-7066","authenticated-orcid":false,"given":"Lanjiong","family":"Li","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2620-9937","authenticated-orcid":false,"given":"Guanhua","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Electronic and Computer Engineering, Peking University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1478-3232","authenticated-orcid":false,"given":"Lingting","family":"Zhu","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5422-4044","authenticated-orcid":false,"given":"Zeyu","family":"Cai","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9315-6527","authenticated-orcid":false,"given":"Lequan","family":"Yu","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5486-3125","authenticated-orcid":false,"given":"Jian","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Electronic and Computer Engineering, Peking University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5374-6330","authenticated-orcid":false,"given":"Zeyu","family":"Wang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China and The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_27"},{"key":"e_1_3_3_2_3_1","unstructured":"Miko\u0142aj Bi\u0144kowski Danica\u00a0J Sutherland Michael Arbel and Arthur Gretton. 2018. Demystifying MMD Gans. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1801.01401 (2018)."},{"key":"e_1_3_3_2_4_1","unstructured":"Kevin Black Michael Janner Yilun Du Ilya Kostrikov and Sergey Levine. 2023a. Training Diffusion Models with Reinforcement Learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.13301 (2023)."},{"key":"e_1_3_3_2_5_1","unstructured":"Kevin Black Michael Janner Yilun Du Ilya Kostrikov and Sergey Levine. 2023b. Training Diffusion Models with Reinforcement Learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.13301 (2023)."},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00526"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00630"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01391"},{"key":"e_1_3_3_2_10_1","unstructured":"Yisol Choi Sangkyung Kwak Kyungmin Lee Hyungwon Choi and Jinwoo Shin. 2024. Improving Diffusion Models for Authentic Virtual Try-on in the Wild. arxiv:https:\/\/arXiv.org\/abs\/2403.05139\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.05139"},{"key":"e_1_3_3_2_11_1","unstructured":"Rinon Gal Yuval Alaluf Yuval Atzmon Or Patashnik Amit\u00a0H Bermano Gal Chechik and Daniel Cohen-Or. 2022. An Image Is Worth One Word: Personalizing Text-to-Image Generation Using Textual Inversion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2208.01618 (2022)."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00838"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00457"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295408"},{"key":"e_1_3_3_2_15_1","series-title":"(NIPS \u201920)","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Proceedings of the 34th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS \u201920). Curran Associates Inc., Red Hook, NY, USA, Article 574, 12\u00a0pages."},{"key":"e_1_3_3_2_16_1","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-Free Diffusion Guidance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.12598 (2022)."},{"key":"e_1_3_3_2_17_1","unstructured":"Zehuan Huang Yuan-Chen Guo Haoran Wang Ran Yi Lizhuang Ma Yan-Pei Cao and Lu Sheng. 2024. MV-Adapter: Multi-View Consistent Image Generation Made Easy. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.03632 (2024)."},{"key":"e_1_3_3_2_18_1","volume-title":"OpenCLIP","author":"Ilharco Gabriel","year":"2021","unstructured":"Gabriel Ilharco, Mitchell Wortsman, Nicholas Carlini, Rohan Taori, Achal Dave, Vaishaal Shankar, Hongseok Namkoong, John Miller, Hannaneh Hajishirzi, Ali Farhadi, and Ludwig Schmidt. 2021. OpenCLIP. https:\/\/doi.org\/10.5281\/zenodo.5143773"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_37"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00781"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_3_2_23_1","unstructured":"Black\u00a0Forest Labs. 2023. FLUX. https:\/\/github.com\/black-forest-labs\/flux."},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_13"},{"key":"e_1_3_3_2_25_1","first-page":"129","volume-title":"Computer Vision \u2013 ECCV 2024: 18th European Conference, Milan, Italy, September 29\u2013October 4, 2024, Proceedings, Part VII","author":"Li Ming","year":"2024","unstructured":"Ming Li, Taojiannan Yang, Huafeng Kuang, Jie Wu, Zhaoning Wang, Xuefeng Xiao, and Chen Chen. 2024. ControlNet++: Improving Conditional Controls with Efficient Consistency Feedback. In Computer Vision \u2013 ECCV 2024: 18th European Conference, Milan, Italy, September 29\u2013October 4, 2024, Proceedings, Part VII (Milan, Italy). Springer-Verlag, Berlin, Heidelberg, 129\u2013147. https:\/\/doi.org\/10.1007\/978-3-031-72667-58"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01835"},{"key":"e_1_3_3_2_27_1","series-title":"(NIPS \u201923)","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Lim Sungbin","year":"2023","unstructured":"Sungbin Lim, Eunbi Yoon, Taehyun Byun, Taewon Kang, Seungwoo Kim, Kyungjae Lee, and Sungjoon Choi. 2023. Score-Based Generative Modeling Through Stochastic Differential Equations. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS \u201923). Curran Associates Inc., Red Hook, NY, USA, Article 1645, 14\u00a0pages."},{"key":"e_1_3_3_2_28_1","unstructured":"Weifeng Lin Xinyu Wei Renrui Zhang Le Zhuo Shitian Zhao Siyuan Huang Junlin Xie Yu Qiao Peng Gao and Hongsheng Li. 2024. PixWizard: Versatile Image-to-Image Visual Assistant with Open-Language Instructions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.15278 (2024)."},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00592"},{"key":"e_1_3_3_2_30_1","first-page":"38","volume-title":"Computer Vision \u2013 ECCV 2024: 18th European Conference, Milan, Italy, September 29\u2013October 4, 2024, Proceedings, Part XLVII","author":"Liu Shilong","year":"2024","unstructured":"Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Qing Jiang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, and Lei Zhang. 2024b. Grounding DINO: Marrying DINO with Grounded Pre-training for Open-Set Object Detection. In Computer Vision \u2013 ECCV 2024: 18th European Conference, Milan, Italy, September 29\u2013October 4, 2024, Proceedings, Part XLVII (Milan, Italy). Springer-Verlag, Berlin, Heidelberg, 38\u201355. https:\/\/doi.org\/10.1007\/978-3-031-72970-63"},{"key":"e_1_3_3_2_31_1","first-page":"1","volume-title":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Liu Tao","year":"2025","unstructured":"Tao Liu, Huafeng Kuang, and Xianming Lin. 2025. Aligning Text-to-Image Models Using Human Feedback. In ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49660.2025.10888279"},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612137"},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_3_2_34_1","unstructured":"OpenAI. 2024. Hello GPT-4o. https:\/\/openai.com\/index\/hello-gpt-4o\/ 2024a. (2024)."},{"key":"e_1_3_3_2_35_1","unstructured":"Dustin Podell Zion English Kyle Lacey Andreas Blattmann Tim Dockhorn Jonas M\u00fcller Joe Penna and Robin Rombach. 2023. SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.01952 (2023)."},{"key":"e_1_3_3_2_36_1","unstructured":"Mihir Prabhudesai Anirudh Goyal Deepak Pathak and Katerina Fragkiadaki. 2024. Aligning Text-to-Image Diffusion Models with Reward Backpropagation. arxiv:https:\/\/arXiv.org\/abs\/2310.03739\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2310.03739"},{"key":"e_1_3_3_2_37_1","series-title":"(NIPS \u201923)","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Qin Can","year":"2023","unstructured":"Can Qin, Shu Zhang, Ning Yu, Yihao Feng, Xinyi Yang, Yingbo Zhou, Huan Wang, Juan\u00a0Carlos Niebles, Caiming Xiong, Silvio Savarese, Stefano Ermon, Yun Fu, and Ran Xu. 2023. Unicontrol: A Unified Diffusion Model for Controllable Visual Generation in the Wild. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS \u201923). Curran Associates Inc., Red Hook, NY, USA, Article 1862, 32\u00a0pages."},{"key":"e_1_3_3_2_38_1","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning Transferable Visual Models from Natural Language Supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_40_1","unstructured":"Ruoxi Shi Hansheng Chen Zhuoyang Zhang Minghua Liu Chao Xu Xinyue Wei Linghao Chen Chong Zeng and Hao Su. 2023. Zero123++: A Single Image to Consistent Multi-View Diffusion Base Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.15110 (2023)."},{"key":"e_1_3_3_2_41_1","series-title":"(ICML\u201915)","first-page":"2256","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric\u00a0A. Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep Unsupervised Learning Using Nonequilibrium Thermodynamics. In Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37 (Lille, France) (ICML\u201915). JMLR.org, 2256\u20132265."},{"key":"e_1_3_3_2_42_1","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2020. Denoising Diffusion Implicit Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.02502 (2020)."},{"key":"e_1_3_3_2_43_1","volume-title":"Generative Modeling by Estimating Gradients of the Data Distribution","author":"Song Yang","year":"2019","unstructured":"Yang Song and Stefano Ermon. 2019. Generative Modeling by Estimating Gradients of the Data Distribution. Curran Associates Inc., Red Hook, NY, USA."},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"crossref","unstructured":"Yoad Tewel Omri Kaduri Rinon Gal Yoni Kasten Lior Wolf Gal Chechik and Yuval Atzmon. 2024. Training-Free Consistent Text-to-Image Generation. ACM Trans. Graph. 43 4 Article 52 (July 2024) 18\u00a0pages. https:\/\/doi.org\/10.1145\/3658157","DOI":"10.1145\/3658157"},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00596"},{"key":"e_1_3_3_2_46_1","unstructured":"Xiaoshi Wu Yiming Hao Keqiang Sun Yixiong Chen Feng Zhu Rui Zhao and Hongsheng Li. 2023. Human Preference Score v2: A Solid Benchmark for Evaluating Human Preferences of Text-to-Image Synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.09341 (2023)."},{"key":"e_1_3_3_2_47_1","unstructured":"Shitao Xiao Yueze Wang Junjie Zhou Huaying Yuan Xingrun Xing Ruiran Yan Shuting Wang Tiejun Huang and Zheng Liu. 2024. OmniGen: Unified Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.11340 (2024)."},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02255"},{"key":"e_1_3_3_2_49_1","series-title":"(NIPS \u201923)","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Xu Jiazheng","year":"2023","unstructured":"Jiazheng Xu, Xiao Liu, Yuchen Wu, Yuxuan Tong, Qinkai Li, Ming Ding, Jie Tang, and Yuxiao Dong. 2023. ImageReward: Learning and Evaluating Human Preferences for Text-to-Image Generation. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS \u201923). Curran Associates Inc., Red Hook, NY, USA, Article 700, 33\u00a0pages."},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_3_2_51_1","unstructured":"Hu Ye Jun Zhang Sibo Liu Xiao Han and Wei Yang. 2023. Ip-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.06721 (2023)."},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_53_1","unstructured":"Tianjun Zhang Yi Zhang Vibhav Vineet Neel Joshi and Xin Wang. 2023b. Controllable Text-to-Image Generation with GPT-4. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.18583 (2023)."},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_18"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00447"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730731","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:58:53Z","timestamp":1774018733000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730731"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":54,"alternative-id":["10.1145\/3721238.3730731","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730731","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}