{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T17:18:17Z","timestamp":1777569497914,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key R&D Program of China","award":["2022ZD0161501"],"award-info":[{"award-number":["2022ZD0161501"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681079","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"3160-3169","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["TALE: Training-free Cross-domain Image Composition via Adaptive Latent Manipulation and Energy-guided Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4529-8584","authenticated-orcid":false,"given":"Kien T.","family":"Pham","sequence":"first","affiliation":[{"name":"Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2810-1035","authenticated-orcid":false,"given":"Jingye","family":"Chen","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2199-3948","authenticated-orcid":false,"given":"Qifeng","family":"Chen","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_2_2_2_1","volume-title":"Textdiffuser-2: Unleashing the power of language models for text rendering. arXiv preprint arXiv:2311.16465","author":"Chen Jingye","year":"2023","unstructured":"Jingye Chen, Yupan Huang, Tengchao Lv, Lei Cui, Qifeng Chen, and Furu Wei. 2023. Textdiffuser-2: Unleashing the power of language models for text rendering. arXiv preprint arXiv:2311.16465 (2023)."},{"key":"e_1_3_2_2_3_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Chen Jingye","year":"2024","unstructured":"Jingye Chen, Yupan Huang, Tengchao Lv, Lei Cui, Qifeng Chen, and Furu Wei. 2024. Textdiffuser: Diffusion models as text painters. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_4_1","unstructured":"Junsong Chen Jincheng Yu Chongjian Ge Lewei Yao Enze Xie Yue Wu Zhongdao Wang James Kwok Ping Luo Huchuan Lu et al. 2023. PixArt-a: Fast Training of Diffusion Transformer for Photorealistic Text-to-Image Synthesis. arXiv preprint arXiv:2310.00426 (2023)."},{"key":"e_1_3_2_2_5_1","volume-title":"Anydoor: Zero-shot object-level image customization. arXiv preprint arXiv:2307.09481","author":"Chen Xi","year":"2023","unstructured":"Xi Chen, Lianghua Huang, Yu Liu, Yujun Shen, Deli Zhao, and Hengshuang Zhao. 2023. Anydoor: Zero-shot object-level image customization. arXiv preprint arXiv:2307.09481 (2023)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00842"},{"key":"e_1_3_2_2_7_1","volume-title":"Wortman Vaughan (Eds.)","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion Models Beat GANs on Image Synthesis. In Advances in Neural Information Processing Systems, M. Ranzato, A. Beygelzimer, Y. Dauphin, P.S. Liang, and J. Wortman Vaughan (Eds.), Vol. 34. Curran Associates, Inc., 8780--8794. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf"},{"key":"e_1_3_2_2_8_1","volume-title":"International conference on machine learning. PMLR, 8489--8510","author":"Du Yilun","year":"2023","unstructured":"Yilun Du, Conor Durkan, Robin Strudel, Joshua B Tenenbaum, Sander Dieleman, Rob Fergus, Jascha Sohl-Dickstein, Arnaud Doucet, and Will Sussman Grathwohl. 2023. Reduce, reuse, recycle: Compositional generation with energy-based diffusion models and mcmc. In International conference on machine learning. PMLR, 8489--8510."},{"key":"e_1_3_2_2_9_1","volume-title":"Ying Nian Wu, and Diederik P Kingma","author":"Gao Ruiqi","year":"2020","unstructured":"Ruiqi Gao, Yang Song, Ben Poole, Ying Nian Wu, and Diederik P Kingma. 2020. Learning energy-based models by diffusion recovery likelihood. arXiv preprint arXiv:2012.08125 (2020)."},{"key":"e_1_3_2_2_10_1","volume-title":"7th International Conference on Learning Representations, ICLR 2019","author":"Geirhos Robert","year":"2019","unstructured":"Robert Geirhos, Patricia Rubisch, Claudio Michaelis, Matthias Bethge, Felix A. Wichmann, and Wieland Brendel. 2019. ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6--9, 2019. OpenReview.net. https:\/\/openreview.net\/forum?id=Bygh9j09KX"},{"key":"e_1_3_2_2_11_1","volume-title":"Generative adversarial nets. Advances in neural information processing systems","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01043"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01460"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01610"},{"key":"e_1_3_2_2_15_1","volume-title":"LLMs Meet Multimodal Generation and Editing: A Survey. arXiv preprint arXiv:2405","author":"He Yingqing","year":"2024","unstructured":"Yingqing He, Zhaoyang Liu, Jingye Chen, Zeyue Tian, Hongyu Liu, Xiaowei Chi, Runtao Liu, Ruibin Yuan, Yazhou Xing, Wenhai Wang, et al. 2024. LLMs Meet Multimodal Generation and Editing: A Survey. arXiv preprint arXiv:2405.19334 (2024)."},{"key":"e_1_3_2_2_16_1","volume-title":"Lin (Eds.)","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 6840--6851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_2_2_17_1","first-page":"8633","article-title":"Video diffusion models","volume":"35","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, Tim Salimans, Alexey Gritsenko, William Chan, Mohammad Norouzi, and David J Fleet. 2022. Video diffusion models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 8633--8646.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00525"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","unstructured":"Justin Johnson Alexandre Alahi and Li Fei-Fei. 2016. Perceptual Losses for Real-Time Style Transfer and Super-Resolution. arxiv: 1603.08155 [cs.CV]","DOI":"10.1007\/978-3-319-46475-6_43"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 14173--14182","author":"Ke Zhanghan","unstructured":"Zhanghan Ke, Yuhao Liu, Lei Zhu, Nanxuan Zhao, and Rynson W.H. Lau. 2023. Neural Preset for Color Style Transfer. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 14173--14182."},{"key":"e_1_3_2_2_22_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma Diederik P","year":"2013","unstructured":"Diederik P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_2_23_1","volume-title":"arXiv:2304.02643","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C. Berg, Wan-Yen Lo, Piotr Doll\u00e1r, and Ross Girshick. 2023. Segment Anything. arXiv:2304.02643 (2023)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01639"},{"key":"e_1_3_2_2_25_1","volume-title":"Marc Aurelio Ranzato, and Fu Jie Huang","author":"Lecun Yann","year":"2006","unstructured":"Yann Lecun, Sumit Chopra, Raia Hadsell, Marc Aurelio Ranzato, and Fu Jie Huang. 2006. A tutorial on energy-based learning. MIT Press."},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"19742","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models. In Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 19730--19742. https:\/\/proceedings.mlr.press\/v202\/li23q.html"},{"key":"e_1_3_2_2_27_1","volume-title":"Layerdiffusion: Layered controlled image editing with diffusion models. In SIGGRAPH Asia 2023 Technical Communications. 1--4.","author":"Li Pengzhi","year":"2023","unstructured":"Pengzhi Li, Qinxuan Huang, Yikang Ding, and Zhiheng Li. 2023. Layerdiffusion: Layered controlled image editing with diffusion models. In SIGGRAPH Asia 2023 Technical Communications. 1--4."},{"key":"e_1_3_2_2_28_1","volume-title":"Dreamedit: Subject-driven image editing. arXiv preprint arXiv:2306.12624","author":"Li Tianle","year":"2023","unstructured":"Tianle Li, Max Ku, Cong Wei, and Wenhu Chen. 2023. Dreamedit: Subject-driven image editing. arXiv preprint arXiv:2306.12624 (2023)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00924"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00037"},{"key":"e_1_3_2_2_31_1","volume-title":"Advances in Neural Information Processing Systems","author":"Lu Cheng","year":"2022","unstructured":"Cheng Lu, Yuhao Zhou, Fan Bao, Jianfei Chen, Chongxuan LI, and Jun Zhu. 2022. DPM-Solver: A Fast ODE Solver for Diffusion Probabilistic Model Sampling in Around 10 Steps. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 5775--5787. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/260a14acce2a89dad36adc8eefe7c59e-Paper-Conference.pdf"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00090"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00218"},{"key":"e_1_3_2_2_34_1","volume-title":"SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations. In International Conference on Learning Representations.","author":"Meng Chenlin","year":"2022","unstructured":"Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, and Stefano Ermon. 2022. SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_35_1","volume-title":"GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models. CoRR","author":"Nichol Alex","year":"2021","unstructured":"Alex Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob McGrew, Ilya Sutskever, and Mark Chen. 2021. GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models. CoRR, Vol. abs\/2112.10741 (2021). showeprint[arXiv]2112.10741 https:\/\/arxiv.org\/abs\/2112.10741"},{"key":"e_1_3_2_2_36_1","volume-title":"Making images real again: A comprehensive survey on deep image composition. arXiv preprint arXiv:2106.14490","author":"Niu Li","year":"2021","unstructured":"Li Niu, Wenyan Cong, Liu Liu, Yan Hong, Bo Zhang, Jing Liang, and Liqing Zhang. 2021. Making images real again: A comprehensive survey on deep image composition. arXiv preprint arXiv:2106.14490 (2021)."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_2_2_38_1","volume-title":"Levine (Eds.)","volume":"36","author":"Pierzchlewicz Pawel","year":"2023","unstructured":"Pawel Pierzchlewicz, Konstantin Willeke, Arne Nix, Pavithra Elumalai, Kelli Restivo, Tori Shinn, Cate Nealley, Gabrielle Rodriguez, Saumil Patel, Katrin Franke, Andreas Tolias, and Fabian Sinz. 2023. Energy Guided Diffusion for Generating Neurally Exciting Images. In Advances in Neural Information Processing Systems, A. Oh, T. Neumann, A. Globerson, K. Saenko, M. Hardt, and S. Levine (Eds.), Vol. 36. Curran Associates, Inc., 32574--32601. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/67226725b09ca9363637f63f85ed4bba-Paper-Conference.pdf"},{"key":"e_1_3_2_2_39_1","volume-title":"Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)."},{"key":"e_1_3_2_2_40_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8748--8763. https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_42_1","volume-title":"Burcu Karagol Ayan, Tim Salimans, et al.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in neural information processing systems, Vol. 35 (2022), 36479--36494."},{"key":"e_1_3_2_2_43_1","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning -","volume":"37","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric A. Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37 (Lille, France) (ICML'15). JMLR.org, 2256--2265."},{"key":"e_1_3_2_2_44_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_2_45_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=PxTIG12RRHS","author":"Song Yang","year":"2021","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2021. Score-Based Generative Modeling through Stochastic Differential Equations. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=PxTIG12RRHS"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01756"},{"key":"e_1_3_2_2_47_1","volume-title":"He Zhang, Wei Xiong, and Daniel Aliaga.","author":"Song Yizhi","year":"2024","unstructured":"Yizhi Song, Zhifei Zhang, Zhe Lin, Scott Cohen, Brian Price, Jianming Zhang, Soo Ye Kim, He Zhang, Wei Xiong, and Daniel Aliaga. 2024. IMPRINT: Generative Object Compositing by Learning Identity-Preserving Representation. arXiv preprint arXiv:2403.10701 (2024)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3186344"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/1778765.1778862"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.299"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350944"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"crossref","unstructured":"Yazhou Xing Yingqing He Zeyue Tian Xintao Wang and Qifeng Chen. 2024. Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners. arxiv: 2402.17723 [cs.CV]","DOI":"10.1109\/CVPR52733.2024.00683"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02118"},{"key":"e_1_3_2_2_55_1","volume-title":"Inpaint Anything: Segment Anything Meets Image Inpainting. arXiv preprint arXiv:2304.06790","author":"Yu Tao","year":"2023","unstructured":"Tao Yu, Runseng Feng, Ruoyu Feng, Jinming Liu, Xin Jin, Wenjun Zeng, and Zhibo Chen. 2023. Inpaint Anything: Segment Anything Meets Image Inpainting. arXiv preprint arXiv:2304.06790 (2023)."},{"key":"e_1_3_2_2_56_1","volume-title":"Controlcom: Controllable image composition using diffusion model. arXiv preprint arXiv:2308.10040","author":"Zhang Bo","year":"2023","unstructured":"Bo Zhang, Yuxuan Duan, Jun Lan, Yan Hong, Huijia Zhu, Weiqiang Wang, and Li Niu. 2023. Controlcom: Controllable image composition using diffusion model. arXiv preprint arXiv:2308.10040 (2023)."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093632"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_2_60_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Zhao Min","year":"2022","unstructured":"Min Zhao, Fan Bao, Chongxuan Li, and Jun Zhu. 2022. EGSDE: Unpaired Image-to-Image Translation via Energy-Guided Stochastic Differential Equations. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022, Sanmi Koyejo, S. Mohamed, A. Agarwal, Danielle Belgrave, K. Cho, and A. Oh (Eds.). http:\/\/papers.nips.cc\/paper_files\/paper\/2022\/hash\/177d68f4adef163b7b123b5c5adb3c60-Abstract-Conference.html"},{"key":"e_1_3_2_2_61_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Zhao Shihao","year":"2024","unstructured":"Shihao Zhao, Dongdong Chen, Yen-Chun Chen, Jianmin Bao, Shaozhe Hao, Lu Yuan, and Kwan-Yee K Wong. 2024. Uni-controlnet: All-in-one control to text-to-image diffusion models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681079","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681079","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:52Z","timestamp":1750294672000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681079"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":61,"alternative-id":["10.1145\/3664647.3681079","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681079","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}