{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T01:20:01Z","timestamp":1775870401385,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":78,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,13]],"date-time":"2024-07-13T00:00:00Z","timestamp":1720828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["226-2023-00145"],"award-info":[{"award-number":["226-2023-00145"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Information Technology Center, Zhejiang University"},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1909028"],"award-info":[{"award-number":["IIS-1909028"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62332015, 62227806"],"award-info":[{"award-number":["62332015, 62227806"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,13]]},"DOI":"10.1145\/3641519.3657396","type":"proceedings-article","created":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T10:39:28Z","timestamp":1720780768000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":40,"title":["DiLightNet: Fine-grained Lighting Control for Diffusion-based Image Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-6373-6848","authenticated-orcid":false,"given":"Chong","family":"Zeng","sequence":"first","affiliation":[{"name":"State Key Lab of CAD and CG, Zhejiang University, China and Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0362-337X","authenticated-orcid":false,"given":"Yue","family":"Dong","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7621-9808","authenticated-orcid":false,"given":"Pieter","family":"Peers","sequence":"additional","affiliation":[{"name":"College of William &amp; Mary, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8087-9649","authenticated-orcid":false,"given":"Youkang","family":"Kong","sequence":"additional","affiliation":[{"name":"Tsinghua University, China and Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4404-2275","authenticated-orcid":false,"given":"Hongzhi","family":"Wu","sequence":"additional","affiliation":[{"name":"State Key Lab of CAD and CG, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8788-2453","authenticated-orcid":false,"given":"Xin","family":"Tong","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,13]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Omri Avrahami Dani Lischinski and Ohad Fried. 2022. Blended diffusion for text-driven editing of natural images. In CVPR. 18208\u201318218.","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_2_2_2_1","volume-title":"Lasagna: Layered Score Distillation for Disentangled Object Relighting.","author":"Bashkirova Dina","year":"2023","unstructured":"Dina Bashkirova, Arijit Ray, Rupayan Mallick, Sarah\u00a0Adel Bargal, Jianming Zhang, Ranjay Krishna, and Kate Saenko. 2023. Lasagna: Layered Score Distillation for Disentangled Object Relighting."},{"key":"e_1_3_2_2_3_1","volume-title":"Zoedepth: Zero-shot transfer by combining relative and metric depth. arXiv preprint arXiv:2302.12288","author":"Bhat Shariq\u00a0Farooq","year":"2023","unstructured":"Shariq\u00a0Farooq Bhat, Reiner Birkl, Diana Wofk, Peter Wonka, and Matthias M\u00fcller. 2023. Zoedepth: Zero-shot transfer by combining relative and metric depth. arXiv preprint arXiv:2302.12288 (2023)."},{"key":"e_1_3_2_2_4_1","unstructured":"Blender Foundation. 2011. Blender Cycles. https:\/\/github.com\/blender\/cycles."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Tim Brooks Aleksander Holynski and Alexei\u00a0A Efros. 2023. Instructpix2pix: Learning to follow image editing instructions. In CVPR. 18392\u201318402.","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_2_6_1","unstructured":"Brent Burley. 2012. Physically-based shading at disney. In ACM Siggraph Courses Vol.\u00a02012."},{"key":"e_1_3_2_2_7_1","volume-title":"MasaCtrl: Tuning-Free Mutual Self-Attention Control for Consistent Image Synthesis and Editing. arXiv preprint arXiv:2304.08465","author":"Cao Mingdeng","year":"2023","unstructured":"Mingdeng Cao, Xintao Wang, Zhongang Qi, Ying Shan, Xiaohu Qie, and Yinqiang Zheng. 2023. MasaCtrl: Tuning-Free Mutual Self-Attention Control for Consistent Image Synthesis and Editing. arXiv preprint arXiv:2304.08465 (2023)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Rui Chen Yongwei Chen Ningxin Jiao and Kui Jia. 2023. Fantasia3D: Disentangling Geometry and Appearance for High-quality Text-to-3D Content Creation. In ICCV.","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"e_1_3_2_2_9_1","volume-title":"Attribute-centric compositional text-to-image generation. arXiv preprint arXiv:2301.01413","author":"Cong Yuren","year":"2023","unstructured":"Yuren Cong, Martin\u00a0Renqiang Min, Li\u00a0Erran Li, Bodo Rosenhahn, and Michael\u00a0Ying Yang. 2023. Attribute-centric compositional text-to-image generation. arXiv preprint arXiv:2301.01413 (2023)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/280814.280864"},{"key":"e_1_3_2_2_11_1","volume-title":"Objaverse: A Universe of Annotated 3D Objects. arXiv preprint arXiv:2212.08051","author":"Deitke Matt","year":"2022","unstructured":"Matt Deitke, Dustin Schwenk, Jordi Salvador, Luca Weihs, Oscar Michel, Eli VanderBilt, Ludwig Schmidt, Kiana Ehsani, Aniruddha Kembhavi, and Ali Farhadi. 2022. Objaverse: A Universe of Annotated 3D Objects. arXiv preprint arXiv:2212.08051 (2022)."},{"key":"e_1_3_2_2_12_1","volume-title":"Arcface: Additive angular margin loss for deep face recognition. In CVPR. 4690\u20134699.","author":"Deng Jiankang","year":"2019","unstructured":"Jiankang Deng, Jia Guo, Niannan Xue, and Stefanos Zafeiriou. 2019. Arcface: Additive angular margin loss for deep face recognition. In CVPR. 4690\u20134699."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"crossref","unstructured":"Valentin Deschaintre George Drettakis and Adrien Bousseau. 2020. Guided fine-tuning for large-scale material transfer. In Comp. Graph. Forum Vol.\u00a039. 91\u2013105.","DOI":"10.1111\/cgf.14056"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Zheng Ding Xuaner Zhang Zhihao Xia Lars Jebe Zhuowen Tu and Xiuming Zhang. 2023. DiffusionRig: Learning Personalized Priors for Facial Appearance Editing. In CVPR. 12736\u201312746.","DOI":"10.1109\/CVPR52729.2023.01225"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459936"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417767"},{"key":"e_1_3_2_2_17_1","unstructured":"Songwei Ge Taesung Park Jun-Yan Zhu and Jia-Bin Huang. 2023. Expressive text-to-image generation with rich text. In CVPR. 7545\u20137556."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14467"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Yuxuan Han Zhibo Wang and Feng Xu. 2023. Learning a 3D Morphable Face Reflectance Model From Low-Cost Data. In CVPR. 8598\u20138608.","DOI":"10.1109\/CVPR52729.2023.00831"},{"key":"e_1_3_2_2_20_1","volume-title":"Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)."},{"key":"e_1_3_2_2_21_1","unstructured":"Jonathan Ho and Tim Salimans. 2021. Classifier-Free Diffusion Guidance. In NeurIPS."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"Chaonan Ji Tao Yu Kaiwen Guo Jingxin Liu and Yebin Liu. 2022. Geometry-Aware Single-Image Full-Body Human Relighting. In ECCV. 388\u2013405.","DOI":"10.1007\/978-3-031-19787-1_22"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3272127.3275104"},{"key":"e_1_3_2_2_24_1","unstructured":"Tero Karras Miika Aittala Timo Aila and Samuli Laine. 2022. Elucidating the Design Space of Diffusion-Based Generative Models. In NeurIPS."},{"key":"e_1_3_2_2_25_1","volume-title":"Imagic: Text-based real image editing with diffusion models. In CVPR. 6007\u20136017.","author":"Kawar Bahjat","year":"2023","unstructured":"Bahjat Kawar, Shiran Zada, Oran Lang, Omer Tov, Huiwen Chang, Tali Dekel, Inbar Mosseri, and Michal Irani. 2023. Imagic: Text-based real image editing with diffusion models. In CVPR. 6007\u20136017."},{"key":"e_1_3_2_2_26_1","volume-title":"Diffusionclip: Text-guided diffusion models for robust image manipulation. In CVPR. 2426\u20132435.","author":"Kim Gwanghyun","year":"2022","unstructured":"Gwanghyun Kim, Taesung Kwon, and Jong\u00a0Chul Ye. 2022. Diffusionclip: Text-guided diffusion models for robust image manipulation. In CVPR. 2426\u20132435."},{"key":"e_1_3_2_2_27_1","volume-title":"Piotr Dollar, and Ross Girshick","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander\u00a0C. Berg, Wan-Yen Lo, Piotr Dollar, and Ross Girshick. 2023. Segment Anything. In ICCV. 4015\u20134026."},{"key":"e_1_3_2_2_28_1","volume-title":"Intrinsic Image Diffusion for Single-view Material Estimation. arXiv preprint arXiv:2312.12274","author":"Kocsis Peter","year":"2023","unstructured":"Peter Kocsis, Vincent Sitzmann, and Matthias Nie\u00dfner. 2023. Intrinsic Image Diffusion for Single-view Material Estimation. arXiv preprint arXiv:2312.12274 (2023)."},{"key":"e_1_3_2_2_29_1","unstructured":"Manuel Lagunas Xin Sun Jimei Yang Ruben Villegas Jianming Zhang Zhixin Shu Belen Masia and Diego Gutierrez. 2021. Single-image Full-body Human Relighting. In EGSR - DL-only Track."},{"key":"e_1_3_2_2_30_1","volume-title":"Learning to factorize and relight a city","author":"Liu Andrew","unstructured":"Andrew Liu, Shiry Ginosar, Tinghui Zhou, Alexei\u00a0A Efros, and Noah Snavely. 2020a. Learning to factorize and relight a city. In ECCV. Springer, 544\u2013561."},{"key":"e_1_3_2_2_31_1","unstructured":"Ruoshi Liu Rundi Wu Basile Van\u00a0Hoorick Pavel Tokmakov Sergey Zakharov and Carl Vondrick. 2023. Zero-1-to-3: Zero-shot one image to 3d object. In ICCV. 9298\u20139309."},{"key":"e_1_3_2_2_32_1","volume-title":"Open-edit: Open-domain image manipulation with open-vocabulary instructions","author":"Liu Xihui","year":"2020","unstructured":"Xihui Liu, Zhe Lin, Jianming Zhang, Handong Zhao, Quan Tran, Xiaogang Wang, and Hongsheng Li. 2020b. Open-edit: Open-domain image manipulation with open-vocabulary instructions. In ECCV. Springer, 89\u2013106."},{"key":"e_1_3_2_2_33_1","unstructured":"Ilya Loshchilov and Frank Hutter. 2018. Decoupled Weight Decay Regularization. In ICLR."},{"key":"e_1_3_2_2_34_1","volume-title":"arXiv preprint arXiv:2307.11410","author":"Ma Jian","year":"2023","unstructured":"Jian Ma, Junhao Liang, Chen Chen, and Haonan Lu. 2023. Subject-Diffusion:Open Domain Personalized Text-to-Image Generation without Test-time Fine-tuning. arXiv preprint arXiv:2307.11410 (2023)."},{"key":"e_1_3_2_2_35_1","unstructured":"Chenlin Meng Yutong He Yang Song Jiaming Song Jiajun Wu Jun-Yan Zhu and Stefano Ermon. 2022. SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations. In ICLR."},{"key":"e_1_3_2_2_36_1","volume-title":"NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis. ECCV","author":"Mildenhall Ben","year":"2020","unstructured":"Ben Mildenhall, Pratul\u00a0P Srinivasan, Matthew Tancik, Jonathan\u00a0T Barron, Ravi Ramamoorthi, and Ren Ng. 2020. NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis. ECCV (2020), 405\u2013421."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"crossref","unstructured":"Ron Mokady Amir Hertz Kfir Aberman Yael Pritch and Daniel Cohen-Or. 2023. Null-text inversion for editing real images using guided diffusion models. In CVPR. 6038\u20136047.","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"e_1_3_2_2_38_1","volume-title":"T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453","author":"Mou Chong","year":"2023","unstructured":"Chong Mou, Xintao Wang, Liangbin Xie, Yanze Wu, Jian Zhang, Zhongang Qi, Ying Shan, and Xiaohu Qie. 2023. T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"crossref","unstructured":"Thomas Nestmeyer Jean-Fran\u00e7ois Lalonde Iain Matthews and Andreas Lehrmann. 2020. Learning physics-guided face relighting under directional light. In CVPR. 5124\u20135133.","DOI":"10.1109\/CVPR42600.2020.00517"},{"key":"e_1_3_2_2_40_1","volume-title":"GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models. In ICML. 16784\u201316804.","author":"Nichol Alexander\u00a0Quinn","year":"2022","unstructured":"Alexander\u00a0Quinn Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob Mcgrew, Ilya Sutskever, and Mark Chen. 2022. GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models. In ICML. 16784\u201316804."},{"key":"e_1_3_2_2_41_1","volume-title":"Teaching clip to count to ten. arXiv preprint arXiv:2302.12066","author":"Paiss Roni","year":"2023","unstructured":"Roni Paiss, Ariel Ephrat, Omer Tov, Shiran Zada, Inbar Mosseri, Michal Irani, and Tali Dekel. 2023. Teaching clip to count to ten. arXiv preprint arXiv:2302.12066 (2023)."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459872"},{"key":"e_1_3_2_2_43_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. NeurIPS 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, 2019. Pytorch: An imperative style, high-performance deep learning library. NeurIPS 32 (2019)."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/1276377.1276442"},{"key":"e_1_3_2_2_45_1","volume-title":"DiFaReli: Diffusion Face Relighting. arXiv preprint arXiv:2304.09479","author":"Ponglertnapakorn Puntawat","year":"2023","unstructured":"Puntawat Ponglertnapakorn, Nontawat Tritrong, and Supasorn Suwajanakorn. 2023. DiFaReli: Diffusion Face Relighting. arXiv preprint arXiv:2304.09479 (2023)."},{"key":"e_1_3_2_2_46_1","volume-title":"U2-Net: Going deeper with nested U-structure for salient object detection. Pattern recognition 106","author":"Qin Xuebin","year":"2020","unstructured":"Xuebin Qin, Zichen Zhang, Chenyang Huang, Masood Dehghan, Osmar\u00a0R Zaiane, and Martin Jagersand. 2020. U2-Net: Going deeper with nested U-structure for salient object detection. Pattern recognition 106 (2020), 107404."},{"key":"e_1_3_2_2_47_1","volume-title":"A signal-processing framework for forward and inverse rendering","author":"Ramamoorthi Ravi","unstructured":"Ravi Ramamoorthi. 2002. A signal-processing framework for forward and inverse rendering. Stanford University."},{"key":"e_1_3_2_2_48_1","volume-title":"Hierarchical Text-Conditional Image Generation with CLIP Latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"crossref","unstructured":"Anurag Ranjan Kwang\u00a0Moo Yi Jen-Hao\u00a0Rick Chang and Oncel Tuzel. 2023. FaceLit: Neural 3D Relightable Faces. In CVPR. 8619\u20138628.","DOI":"10.1109\/CVPR52729.2023.00833"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis With Latent Diffusion Models. In CVPR. 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_51_1","volume-title":"Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In CVPR. 22500\u201322510.","author":"Ruiz Nataniel","year":"2023","unstructured":"Nataniel Ruiz, Yuanzhen Li, Varun Jampani, Yael Pritch, Michael Rubinstein, and Kfir Aberman. 2023a. Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In CVPR. 22500\u201322510."},{"key":"e_1_3_2_2_52_1","volume-title":"HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models. arXiv preprint arXiv:2307.06949","author":"Ruiz Nataniel","year":"2023","unstructured":"Nataniel Ruiz, Yuanzhen Li, Varun Jampani, Wei Wei, Tingbo Hou, Yael Pritch, Neal Wadhwa, Michael Rubinstein, and Kfir Aberman. 2023b. HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models. arXiv preprint arXiv:2307.06949 (2023)."},{"key":"e_1_3_2_2_53_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily\u00a0L Denton, Kamyar Ghasemipour, Raphael Gontijo\u00a0Lopes, Burcu Karagol\u00a0Ayan, Tim Salimans, 2022. Photorealistic text-to-image diffusion models with deep language understanding. NeurIPS 35 (2022), 36479\u201336494.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_54_1","volume-title":"MatFusion: A Generative Diffusion Model for SVBRDF Capture. In SIGGRAPH Asia 2023 Conference Papers. 1\u201310","author":"Sartor Sam","year":"2023","unstructured":"Sam Sartor and Pieter Peers. 2023. MatFusion: A Generative Diffusion Model for SVBRDF Capture. In SIGGRAPH Asia 2023 Conference Papers. 1\u201310."},{"key":"e_1_3_2_2_55_1","volume-title":"Alchemist: Parametric Control of Material Properties with Diffusion Models. arXiv preprint arXiv:2312.02970","author":"Sharma Prafull","year":"2023","unstructured":"Prafull Sharma, Varun Jampani, Yuanzhen Li, Xuhui Jia, Dmitry Lagun, Fredo Durand, William\u00a0T. Freeman, and Mark Matthews. 2023. Alchemist: Parametric Control of Material Properties with Diffusion Models. arXiv preprint arXiv:2312.02970 (2023)."},{"key":"e_1_3_2_2_56_1","unstructured":"Zhixin Shu Ersin Yumer Sunil Hadap Kalyan Sunkavalli Eli Shechtman and Dimitris Samaras. 2017. Neural face editing with intrinsic image disentangling. In CVPR. 5541\u20135550."},{"key":"e_1_3_2_2_57_1","unstructured":"Yang Song Jascha Sohl-Dickstein Diederik\u00a0P Kingma Abhishek Kumar Stefano Ermon and Ben Poole. 2021. Score-Based Generative Modeling through Stochastic Differential Equations. In ICLR."},{"key":"e_1_3_2_2_58_1","unstructured":"Stability AI. 2022a. Stable Diffusion V2 - Inpainting. https:\/\/huggingface.co\/stabilityai\/stable-diffusion-2-inpainting."},{"key":"e_1_3_2_2_59_1","unstructured":"Stability AI. 2022b. Stable Diffusion V2.1. https:\/\/huggingface.co\/stabilityai\/stable-diffusion-2-1."},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3323008"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"crossref","unstructured":"Narek Tumanyan Michal Geyer Shai Bagon and Tali Dekel. 2023. Plug-and-Play Diffusion Features for Text-Driven Image-to-Image Translation. In CVPR. 1921\u20131930.","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"crossref","unstructured":"Murat T\u00fcre Mustafa\u00a0Ege \u00c7\u0131klabakkal Aykut Erdem Erkut Erdem Pinar Sat\u0131lm\u0131\u015f and Ahmet\u00a0Oguz Aky\u00fcz. 2021. From Noon to Sunset: Interactive Rendering Relighting and Recolouring of Landscape Photographs by Modifying Solar Position. In Comp. Graph. Forum Vol.\u00a040. 500\u2013515.","DOI":"10.1111\/cgf.14392"},{"key":"e_1_3_2_2_63_1","volume-title":"ControlMat: A Controlled Generative Approach to Material Capture. arXiv preprint arXiv:2309.01700","author":"Vecchio Giuseppe","year":"2023","unstructured":"Giuseppe Vecchio, Rosalie Martin, Arthur Roullier, Adrien Kaiser, Romain Rouffet, Valentin Deschaintre, and Tamy Boubekeur. 2023. ControlMat: A Controlled Generative Approach to Material Capture. arXiv preprint arXiv:2309.01700 (2023)."},{"key":"e_1_3_2_2_64_1","volume-title":"Sketch-Guided Text-to-Image Diffusion Models. In ACM SIGGRAPH 2023 Conference Proceedings. Article 55","author":"Voynov Andrey","year":"2023","unstructured":"Andrey Voynov, Kfir Aberman, and Daniel Cohen-Or. 2023a. Sketch-Guided Text-to-Image Diffusion Models. In ACM SIGGRAPH 2023 Conference Proceedings. Article 55, 11\u00a0pages."},{"key":"e_1_3_2_2_65_1","volume-title":"Extended Textual Conditioning in Text-to-Image Generation. arXiv preprint arXiv:2303.09522","author":"Voynov Andrey","year":"2023","unstructured":"Andrey Voynov, Qinghao Chu, Daniel Cohen-Or, and Kfir Aberman. 2023b. P+: Extended Textual Conditioning in Text-to-Image Generation. arXiv preprint arXiv:2303.09522 (2023)."},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.244"},{"key":"e_1_3_2_2_67_1","volume-title":"Novel view synthesis with diffusion models. arXiv preprint arXiv:2210.04628","author":"Watson Daniel","year":"2022","unstructured":"Daniel Watson, William Chan, Ricardo Martin-Brualla, Jonathan Ho, Andrea Tagliasacchi, and Mohammad Norouzi. 2022. Novel view synthesis with diffusion models. arXiv preprint arXiv:2210.04628 (2022)."},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3034185"},{"key":"e_1_3_2_2_69_1","volume-title":"3D-aware Image Generation using 2D Diffusion Models. arXiv preprint arXiv:2303.17905","author":"Xiang Jianfeng","year":"2023","unstructured":"Jianfeng Xiang, Jiaolong Yang, Binbin Huang, and Xin Tong. 2023. 3D-aware Image Generation using 2D Diffusion Models. arXiv preprint arXiv:2303.17905 (2023)."},{"key":"e_1_3_2_2_70_1","volume-title":"FastComposer: Tuning-Free Multi-Subject Image Generation with Localized Attention. arXiv preprint arXiv:2305.10431","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Tianwei Yin, William\u00a0T. Freeman, Fr\u00e9do Durand, and Song Han. 2023. FastComposer: Tuning-Free Multi-Subject Image Generation with Localized Attention. arXiv preprint arXiv:2305.10431 (2023)."},{"key":"e_1_3_2_2_71_1","volume-title":"Matlaber: Material-aware text-to-3d via latent brdf auto-encoder. arXiv preprint arXiv:2308.09278","author":"Xu Xudong","year":"2023","unstructured":"Xudong Xu, Zhaoyang Lyu, Xingang Pan, and Bo Dai. 2023. Matlaber: Material-aware text-to-3d via latent brdf auto-encoder. arXiv preprint arXiv:2308.09278 (2023)."},{"key":"e_1_3_2_2_72_1","volume-title":"IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models. arXiv preprint arXiv:2308.06721","author":"Ye Hu","year":"2023","unstructured":"Hu Ye, Jun Zhang, Sibo Liu, Xiao Han, and Wei Yang. 2023. IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models. arXiv preprint arXiv:2308.06721 (2023)."},{"key":"e_1_3_2_2_73_1","volume-title":"Bisenet: Bilateral segmentation network for real-time semantic segmentation. In ECCV. 325\u2013341.","author":"Yu Changqian","year":"2018","unstructured":"Changqian Yu, Jingbo Wang, Chao Peng, Changxin Gao, Gang Yu, and Nong Sang. 2018. Bisenet: Bilateral segmentation network for real-time semantic segmentation. In ECCV. 325\u2013341."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"crossref","unstructured":"Ye Yu Abhimitra Meka Mohamed Elgharib Hans-Peter Seidel Christian Theobalt and William\u00a0AP Smith. 2020. Self-supervised outdoor scene relighting. In ECCV. 84\u2013101.","DOI":"10.1007\/978-3-030-58542-6_6"},{"key":"e_1_3_2_2_75_1","volume-title":"Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models. arXiv preprint arXiv:2312.13913","author":"Zeng Xianfang","year":"2023","unstructured":"Xianfang Zeng, Xin Chen, Zhongqi Qi, Wen Liu, Zibo Zhao, Zhibin Wang, Bin Fu, Yong Liu, and Gang Yu. 2023. Paint3D: Paint Anything 3D with Lighting-Less Texture Diffusion Models. arXiv preprint arXiv:2312.13913 (2023)."},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592094"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"crossref","unstructured":"Lvmin Zhang Anyi Rao and Maneesh Agrawala. 2023b. Adding conditional control to text-to-image diffusion models. In CVPR. 3836\u20133847.","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"crossref","unstructured":"Richard Zhang Phillip Isola Alexei\u00a0A Efros Eli Shechtman and Oliver Wang. 2018. The unreasonable effectiveness of deep features as a perceptual metric. In CVPR. 586\u2013595.","DOI":"10.1109\/CVPR.2018.00068"}],"event":{"name":"SIGGRAPH '24: Special Interest Group on Computer Graphics and Interactive Techniques Conference","location":"Denver CO USA","acronym":"SIGGRAPH '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657396","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3641519.3657396","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:35Z","timestamp":1750295375000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657396"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,13]]},"references-count":78,"alternative-id":["10.1145\/3641519.3657396","10.1145\/3641519"],"URL":"https:\/\/doi.org\/10.1145\/3641519.3657396","relation":{},"subject":[],"published":{"date-parts":[[2024,7,13]]},"assertion":[{"value":"2024-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}