{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:48:48Z","timestamp":1774021728922,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"name":"Hong Kong RGC, Generative AI Research, Development Centre from InnoHK","award":["T45-205\/21-N"],"award-info":[{"award-number":["T45-205\/21-N"]}]},{"DOI":"10.13039\/501100019491","name":"National Natural Science Foundation of China - State Grid Corporation Joint Fund for Smart Grid","doi-asserted-by":"publisher","award":["62372480"],"award-info":[{"award-number":["62372480"]}],"id":[{"id":"10.13039\/501100019491","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["2023A1515012839"],"award-info":[{"award-number":["2023A1515012839"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730722","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["CMD: Controllable Multiview Diffusion for 3D Editing and Progressive Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1066-6642","authenticated-orcid":false,"given":"Peng","family":"Li","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4675-7343","authenticated-orcid":false,"given":"Suizhi","family":"Ma","sequence":"additional","affiliation":[{"name":"Johns Hopkins University, Baltimore, Maryland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6589-8792","authenticated-orcid":false,"given":"Jialiang","family":"Chen","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2933-5667","authenticated-orcid":false,"given":"Yuan","family":"Liu","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4259-2863","authenticated-orcid":false,"given":"Congyi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Univeristy of British Columbia, Vancouver, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4942-7748","authenticated-orcid":false,"given":"Wei","family":"Xue","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5697-4168","authenticated-orcid":false,"given":"Wenhan","family":"Luo","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9251-3716","authenticated-orcid":false,"given":"Alla","family":"Sheffer","sequence":"additional","affiliation":[{"name":"Univeristy of British Columbia, Vancouver, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2284-3952","authenticated-orcid":false,"given":"Wenping","family":"Wang","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, College Station, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8401-282X","authenticated-orcid":false,"given":"Yike","family":"Guo","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_34"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"crossref","unstructured":"Amir Barda Vladimir\u00a0G. Kim Noam Aigerman Amit\u00a0H. Bermano and Thibault Groueix. 2024. MagicClay: Sculpting Meshes With Generative Neural Fields. SIGGRAPH Asia (Conference track) (2024).","DOI":"10.1145\/3680528.3687627"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/280811.280992"},{"key":"e_1_3_3_2_5_1","unstructured":"Hansheng Chen Ruoxi Shi Yulin Liu Bokui Shen Jiayuan Gu Gordon Wetzstein Hao Su and Leonidas Guibas. 2024d. Generic 3D Diffusion Adapter Using Controlled Multi-View Editing. arxiv:https:\/\/arXiv.org\/abs\/2403.12032\u00a0[cs.CV]"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"crossref","unstructured":"Minghao Chen Iro Laina and Andrea Vedaldi. 2024b. DGE: Direct Gaussian 3D Editing by Consistent Multi-view Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.18929 (2024).","DOI":"10.1007\/978-3-031-72904-1_5"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02029"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687630"},{"key":"e_1_3_3_2_10_1","unstructured":"Xinhua Cheng Tianyu Yang Jianan Wang Yu Li Lei Zhang Jian Zhang and Li Yuan. 2023. Progressive3d: Progressively local editing for text-to-3d content creation with complex semantic prompts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.11784 (2023)."},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"crossref","unstructured":"Wenqi Dong Bangbang Yang Lin Ma Xiao Liu Liyuan Cui Hujun Bao Yuewen Ma and Zhaopeng Cui. 2024. Coin3D: Controllable and Interactive 3D Assets Generation with Proxy-Guided Conditioning. (2024). arxiv:https:\/\/arXiv.org\/abs\/2405.08054\u00a0[cs.GR]","DOI":"10.1145\/3641519.3657425"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"crossref","unstructured":"Thomas Funkhouser Michael Kazhdan Philip Shilane Patrick Min William Kiefer Ayellet Tal Szymon Rusinkiewicz and David Dobkin. 2004. Modeling by example. ACM transactions on graphics (TOG) 23 3 (2004) 652\u2013663.","DOI":"10.1145\/1015706.1015775"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591552"},{"key":"e_1_3_3_2_15_1","unstructured":"William Gao Dilin Wang Yuchen Fan Alja\u017e Bo\u017ei\u010d Tuur Stuyck Zhengqin Li Zhao Dong Rakesh Ranjan and Nikolaos Sarafianos. 2024. 3D Mesh Editing using Masked LRMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.08641 (2024)."},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/258734.258849"},{"key":"e_1_3_3_2_17_1","unstructured":"Zekai Gu Rui Yan Jiahao Lu Peng Li Zhiyang Dou Chenyang Si Zhen Dong Qifeng Liu Cheng Lin Ziwei Liu Wenping Wang and Yuan Liu. 2025. Diffusion as Shader: 3D-aware Video Diffusion for Versatile Video Generation Control. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.03847 (2025)."},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"e_1_3_3_2_19_1","volume-title":"NeurIPS","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. In NeurIPS."},{"key":"e_1_3_3_2_20_1","volume-title":"ICLR","author":"Hong Yicong","year":"2024","unstructured":"Yicong Hong, Kai Zhang, Jiuxiang Gu, Sai Bi, Yang Zhou, Difan Liu, Feng Liu, Kalyan Sunkavalli, Trung Bui, and Hao Tan. 2024. LRM: Large reconstruction model for single image to 3d. In ICLR."},{"key":"e_1_3_3_2_21_1","unstructured":"Zehuan Huang Yuanchen Guo Haoran Wang Ran Yi Lizhuang Ma Yan-Pei Cao and Lu Sheng. 2024. MV-Adapter: Multi-view Consistent Image Generation Made Easy. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.03632 (2024)."},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2614028.2615427"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"crossref","unstructured":"Evangelos Kalogerakis Siddhartha Chaudhuri Daphne Koller and Vladlen Koltun. 2012. A probabilistic model for component-based shape synthesis. Acm Transactions on Graphics (TOG) 31 4 (2012) 1\u201311.","DOI":"10.1145\/2185520.2185551"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"crossref","unstructured":"Bernhard Kerbl Georgios Kopanas Thomas Leimk\u00fchler and George Drettakis. 2023. 3D Gaussian Splatting for Real-Time Radiance Field Rendering. ACM Transactions on Graphics 42 4 (July 2023). https:\/\/repo-sam.inria.fr\/fungraph\/3d-gaussian-splatting\/","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"crossref","unstructured":"Samuli Laine Janne Hellsten Tero Karras Yeongho Seol Jaakko Lehtinen and Timo Aila. 2020. Modular Primitives for High-Performance Differentiable Rendering. ACM Transactions on Graphics 39 6 (2020).","DOI":"10.1145\/3414685.3417861"},{"key":"e_1_3_3_2_27_1","unstructured":"Mengfei Li Xiaoxiao Long Yixun Liang Weiyu Li Yuan Liu Peng Li Xiaowei Chi Xingqun Qi Wei Xue Wenhan Luo et\u00a0al. 2024c. M-lrm: Multi-view large reconstruction model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.07648 (2024)."},{"key":"e_1_3_3_2_28_1","unstructured":"Peng Li Yuan Liu Xiaoxiao Long Feihu Zhang Cheng Lin Mengfei Li Xingqun Qi Shanghang Zhang Wenhan Luo Ping Tan et\u00a0al. 2024a. Era3D: High-Resolution Multiview Diffusion using Efficient Row-wise Attention. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.11616 (2024)."},{"key":"e_1_3_3_2_29_1","unstructured":"Peng Li Wangguandong Zheng Yuan Liu Tao Yu Yangguang Li Xingqun Qi Mengfei Li Xiaowei Chi Siyu Xia Wei Xue et\u00a0al. 2024d. PSHuman: Photorealistic Single-view Human Reconstruction using Cross-Scale Diffusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.10141 (2024)."},{"key":"e_1_3_3_2_30_1","unstructured":"Weiyu Li Jiarui Liu Hongyu Yan Rui Chen Yixun Liang Xuelin Chen Ping Tan and Xiaoxiao Long. 2024b. CraftsMan3D: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.14979 (2024)."},{"key":"e_1_3_3_2_31_1","unstructured":"Yuhan Li Yishun Dou Yue Shi Yu Lei Xuanhong Chen Yi Zhang Peng Zhou and Bingbing Ni. 2023. FocalDreamer: Text-driven 3D Editing via Focal-fusion Assembly. arxiv:https:\/\/arXiv.org\/abs\/2308.10608\u00a0[cs.CV]"},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV66043.2025.00107"},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"e_1_3_3_2_35_1","volume-title":"ICLR","author":"Liu Yuan","year":"2024","unstructured":"Yuan Liu, Cheng Lin, Zijiao Zeng, Xiaoxiao Long, Lingjie Liu, Taku Komura, and Wenping Wang. 2024. SyncDreamer: Generating multiview-consistent images from a single-view image. In ICLR."},{"key":"e_1_3_3_2_36_1","volume-title":"International Conference on Learning Representations","author":"Liu Zhen","year":"2023","unstructured":"Zhen Liu, Yao Feng, Michael\u00a0J. Black, Derek Nowrouzezahrai, Liam Paull, and Weiyang Liu. 2023a. MeshDiffusion: Score-based Generative 3D Mesh Modeling. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=0cpM2ApF9p6"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"crossref","unstructured":"Aryan Mikaeili Or Perel Mehdi Safaee Daniel Cohen-Or and Ali Mahdavi-Amiri. 2023. SKED: Sketch-guided Text-based 3D Editing. ICCV (2023).","DOI":"10.1109\/ICCV51070.2023.01343"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"crossref","unstructured":"Ben Mildenhall Pratul\u00a0P Srinivasan Matthew Tancik Jonathan\u00a0T Barron Ravi Ramamoorthi and Ren Ng. 2021. NeRF: Representing scenes as neural radiance fields for view synthesis. Commun. ACM 65 1 (2021) 99\u2013106.","DOI":"10.1145\/3503250"},{"key":"e_1_3_3_2_40_1","unstructured":"Yeongtak Oh Jooyoung Choi Yongsung Kim Minjun Park Chaehun Shin and Sungroh Yoon. 2023. ControlDreamer: Blending Geometry and Style in Text-to-3D. arXiv:https:\/\/arXiv.org\/abs\/2312.01129 (2023)."},{"key":"e_1_3_3_2_41_1","unstructured":"OpenArt. 2023. OpenArt. https:\/\/openart.ai."},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"crossref","unstructured":"Werner Palfinger. 2022. Continuous remeshing for inverse rendering. Computer Animation and Virtual Worlds 33 5 (2022) e2101.","DOI":"10.1002\/cav.2101"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00025"},{"key":"e_1_3_3_2_44_1","unstructured":"Ben Poole Ajay Jain Jonathan\u00a0T Barron and Ben Mildenhall. 2022. DreamFusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2209.14988 (2022)."},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00093"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_47_1","unstructured":"Benet\u00a0Oriol Sabat Alessandro Achille Matthew Trager and Stefano Soatto. 2024. NeRF-Insert: 3D Local Editing with Multimodal Control Signals. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.19204 (2024)."},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1179352.1141930"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00046"},{"key":"e_1_3_3_2_50_1","unstructured":"Ruoxi Shi Hansheng Chen Zhuoyang Zhang Minghua Liu Chao Xu Xinyue Wei Linghao Chen Chong Zeng and Hao Su. 2023. Zero123++: a Single Image to Consistent Multi-view Diffusion Base Model. arxiv:https:\/\/arXiv.org\/abs\/2310.15110\u00a0[cs.CV]"},{"key":"e_1_3_3_2_51_1","doi-asserted-by":"crossref","unstructured":"Jiaxiang Tang Zhaoxi Chen Xiaokang Chen Tengfei Wang Gang Zeng and Ziwei Liu. 2024. LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.05054 (2024).","DOI":"10.1007\/978-3-031-73235-5_1"},{"key":"e_1_3_3_2_52_1","volume-title":"NeurIPS","author":"Wang Peng","year":"2021","unstructured":"Peng Wang, Lingjie Liu, Yuan Liu, Christian Theobalt, Taku Komura, and Wenping Wang. 2021. NeuS: Learning neural implicit surfaces by volume rendering for multi-view reconstruction. In NeurIPS."},{"key":"e_1_3_3_2_53_1","unstructured":"Zhengyi Wang Cheng Lu Yikai Wang Fan Bao Chongxuan Li Hang Su and Jun Zhu. 2024. Prolificdreamer: High-fidelity and diverse text-to-3d generation with variational score distillation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_54_1","unstructured":"Kailu Wu Fangfu Liu Zhihan Cai Runjie Yan Hanyang Wang Yating Hu Yueqi Duan and Kaisheng Ma. 2024b. Unique3D: High-Quality and Efficient 3D Mesh Generation from a Single Image. arxiv:https:\/\/arXiv.org\/abs\/2405.20343\u00a0[cs.CV]"},{"key":"e_1_3_3_2_55_1","unstructured":"Shuang Wu Youtian Lin Feihu Zhang Yifei Zeng Jingxi Xu Philip Torr Xun Cao and Yao Yao. 2024a. Direct3D: Scalable Image-to-3D Generation via 3D Latent Diffusion Transformer. arXiv:https:\/\/arXiv.org\/abs\/2405.14832 (2024)."},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"crossref","unstructured":"Jianfeng Xiang Zelong Lv Sicheng Xu Yu Deng Ruicheng Wang Bowen Zhang Dong Chen Xin Tong and Jiaolong Yang. 2024. Structured 3D Latents for Scalable and Versatile 3D Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.01506 (2024).","DOI":"10.1109\/CVPR52734.2025.02000"},{"key":"e_1_3_3_2_57_1","unstructured":"Jiale Xu Weihao Cheng Yiming Gao Xintao Wang Shenghua Gao and Ying Shan. 2024. Instantmesh: Efficient 3d mesh generation from a single image with sparse-view large reconstruction models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.07191 (2024)."},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"crossref","unstructured":"Lvmin Zhang Anyi Rao and Maneesh Agrawala. 2023. Adding Conditional Control to Text-to-Image Diffusion Models.","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"crossref","unstructured":"Longwen Zhang Ziyu Wang Qixuan Zhang Qiwei Qiu Anqi Pang Haoran Jiang Wei Yang Lan Xu and Jingyi Yu. 2024. CLAY: A Controllable Large-scale Generative Model for Creating High-quality 3D Assets. ACM Transactions on Graphics (TOG) 43 4 (2024) 1\u201320.","DOI":"10.1145\/3658146"},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"crossref","unstructured":"Jingyu Zhuang Di Kang Yan-Pei Cao Guanbin Li Liang Lin and Ying Shan. 2024. TIP-Editor: An Accurate 3D Editor Following Both Text-Prompts And Image-Prompts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.14828 (2024).","DOI":"10.1145\/3658205"},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618190"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730722","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:55:59Z","timestamp":1774018559000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730722"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":60,"alternative-id":["10.1145\/3721238.3730722","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730722","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}