{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:46:35Z","timestamp":1774021595094,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730648","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["RELATE3D: REfocusing Latent Adapter for Targeted local Enhancement and Editing in 3D Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-4160-7560","authenticated-orcid":false,"given":"Xiao-Lei","family":"Li","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China and Tencent Video AI Center, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4357-4226","authenticated-orcid":false,"given":"Hao-Xiang","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7181-1781","authenticated-orcid":false,"given":"Yanni","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tencent Video AI Center, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2805-3692","authenticated-orcid":false,"given":"Kai","family":"Ma","sequence":"additional","affiliation":[{"name":"Tencent PCG, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3960-7525","authenticated-orcid":false,"given":"Alan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Tencent Video AI Center, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9197-346X","authenticated-orcid":false,"given":"Tai-Jiang","family":"Mu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0002-5252","authenticated-orcid":false,"given":"Hao-Xiang","family":"Guo","sequence":"additional","affiliation":[{"name":"Skywork AI, Kunlun Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3808-281X","authenticated-orcid":false,"given":"Ran","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tencent Video AI Center, New York, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Aiech. 2024. PixVerse. https:\/\/app.pixverse.ai"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","unstructured":"Rohan Anil Sebastian Borgeaud Yonghui Wu Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew\u00a0M. Dai Anja Hauth Katie Millican David Silver Slav Petrov Melvin Johnson Ioannis Antonoglou Julian Schrittwieser Amelia Glaese Jilin Chen Emily Pitler Timothy\u00a0P. Lillicrap Angeliki Lazaridou Orhan Firat James Molloy Michael Isard Paul\u00a0Ronald Barham Tom Hennigan Benjamin Lee Fabio Viola Malcolm Reynolds Yuanzhong Xu Ryan Doherty Eli Collins Clemens Meyer Eliza Rutherford Erica Moreira Kareem Ayoub Megha Goel George Tucker Enrique Piqueras Maxim Krikun Iain Barr Nikolay Savinov Ivo Danihelka Becca Roelofs Ana\u00efs White Anders Andreassen Tamara von Glehn Lakshman Yagati Mehran Kazemi Lucas Gonzalez Misha Khalman Jakub Sygnowski and et al.2023. Gemini: A Family of Highly Capable Multimodal Models. CoRR abs\/2312.11805 (2023). 10.48550\/ARXIV.2312.11805 arXiv:https:\/\/arXiv.org\/abs\/2312.11805","DOI":"10.48550\/ARXIV.2312.11805"},{"key":"e_1_3_3_2_4_1","unstructured":"Black Forest Labs Inc.2025. FLUX 1.1 Pro. https:\/\/blackforestlabs.ai\/1-1-pro Accessed: 2025-01-22."},{"key":"e_1_3_3_2_5_1","unstructured":"Tim Brooks Bill Peebles Connor Holmes Will DePue Yufei Guo Li Jing David Schnurr Joe Taylor Troy Luhman Eric Luhman Clarence Ng Ricky Wang and Aditya Ramesh. 2024. Video generation models as world simulators. (2024). https:\/\/openai.com\/research\/video-generation-models-as-world-simulators"},{"key":"e_1_3_3_2_6_1","unstructured":"Hansheng Chen Bokui Shen Yulin Liu Ruoxi Shi Linqi Zhou Connor\u00a0Z Lin Jiayuan Gu Hao Su Gordon Wetzstein and Leonidas Guibas. 2024d. 3D-Adapter: Geometry-Consistent Multi-View Diffusion for High-Quality 3D Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.18974 (2024)."},{"key":"e_1_3_3_2_7_1","unstructured":"Hansheng Chen Ruoxi Shi Yulin Liu Bokui Shen Jiayuan Gu Gordon Wetzstein Hao Su and Leonidas Guibas. 2024e. Generic 3D Diffusion Adapter Using Controlled Multi-View Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.12032 (2024)."},{"key":"e_1_3_3_2_8_1","volume-title":"The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024","author":"Chen Junsong","year":"2024","unstructured":"Junsong Chen, Jincheng Yu, Chongjian Ge, Lewei Yao, Enze Xie, Zhongdao Wang, James\u00a0T. Kwok, Ping Luo, Huchuan Lu, and Zhenguo Li. 2024h. PixArt-\u03b1 : Fast Training of Diffusion Transformer for Photorealistic Text-to-Image Synthesis. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net. https:\/\/openreview.net\/forum?id=eAKmQPe3m1"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02498"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","unstructured":"Sijin Chen Xin Chen Anqi Pang Xianfang Zeng Wei Cheng Yijun Fu Fukun Yin Yanru Wang Zhibin Wang Chi Zhang Jingyi Yu Gang Yu Bin Fu and Tao Chen. 2024a. MeshXL: Neural Coordinate Field for Generative 3D Foundation Models. CoRR abs\/2405.20853 (2024). 10.48550\/ARXIV.2405.20853 arXiv:https:\/\/arXiv.org\/abs\/2405.20853","DOI":"10.48550\/ARXIV.2405.20853"},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2669"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","unstructured":"Yiwen Chen Tong He Di Huang Weicai Ye Sijin Chen Jiaxiang Tang Xin Chen Zhongang Cai Lei Yang Gang Yu Guosheng Lin and Chi Zhang. 2024c. MeshAnything: Artist-Created Mesh Generation with Autoregressive Transformers. CoRR abs\/2406.10163 (2024). 10.48550\/ARXIV.2406.10163 arXiv:https:\/\/arXiv.org\/abs\/2406.10163","DOI":"10.48550\/ARXIV.2406.10163"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","unstructured":"Zhaoxi Chen Jiaxiang Tang Yuhao Dong Ziang Cao Fangzhou Hong Yushi Lan Tengfei Wang Haozhe Xie Tong Wu Shunsuke Saito Liang Pan Dahua Lin and Ziwei Liu. 2024f. 3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion. CoRR abs\/2409.12957 (2024). 10.48550\/ARXIV.2409.12957 arXiv:https:\/\/arXiv.org\/abs\/2409.12957","DOI":"10.48550\/ARXIV.2409.12957"},{"key":"e_1_3_3_2_14_1","unstructured":"Deemos Technologies Inc.2025. Hyper3D. https:\/\/hyper3d.ai\/ Accessed: 2025-01-22."},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"crossref","unstructured":"Ziya Erko\u00e7 Can G\u00fcmeli Chaoyang Wang Matthias Nie\u00dfner Angela Dai Peter Wonka Hsin-Ying Lee and Peiye Zhuang. 2024. PrEditor3D: Fast and Precise 3D Shape Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.06592 (2024).","DOI":"10.1109\/CVPR52734.2025.00068"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","unstructured":"Rinon Gal Or Patashnik Haggai Maron Amit\u00a0H. Bermano Gal Chechik and Daniel Cohen-Or. 2022. StyleGAN-NADA: CLIP-guided domain adaptation of image generators. ACM Trans. Graph. 41 4 (2022) 141:1\u2013141:13. 10.1145\/3528223.3530164","DOI":"10.1145\/3528223.3530164"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"e_1_3_3_2_18_1","first-page":"6840","volume-title":"Advances in Neural Information Processing Systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems , H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 6840\u20136851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_3_2_19_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Hong Yicong","year":"2024","unstructured":"Yicong Hong, Kai Zhang, Jiuxiang Gu, Sai Bi, Yang Zhou, Difan Liu, Feng Liu, Kalyan Sunkavalli, Trung Bui, and Hao Tan. 2024. LRM: Large Reconstruction Model for Single Image to 3D. In International Conference on Learning Representations (ICLR). https:\/\/openreview.net\/forum?id=sllU8vvsFF"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618144"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657412"},{"key":"e_1_3_3_2_22_1","unstructured":"Heewoo Jun and Alex Nichol. 2023. Shap-E: Generating Conditional 3D Implicit Functions. arxiv:https:\/\/arXiv.org\/abs\/2305.02463\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2305.02463"},{"key":"e_1_3_3_2_23_1","unstructured":"KuaiShou. 2024. kling. https:\/\/klingai.com"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73235-5_7"},{"key":"e_1_3_3_2_25_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Li Jiahao","year":"2024","unstructured":"Jiahao Li, Hao Tan, Kai Zhang, Zexiang Xu, Fujun Luan, Yinghao Xu, Yicong Hong, Kalyan Sunkavalli, Greg Shakhnarovich, and Sai Bi. 2024b. Instant3D: Fast Text-to-3D with Sparse-view Generation and Large Reconstruction Model. In International Conference on Learning Representations (ICLR). https:\/\/openreview.net\/forum?id=2lDQLiH1W4"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","unstructured":"Weiyu Li Jiarui Liu Rui Chen Yixun Liang Xuelin Chen Ping Tan and Xiaoxiao Long. 2024a. CraftsMan: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner. CoRR abs\/2405.14979 (2024). 10.48550\/ARXIV.2405.14979 arXiv:https:\/\/arXiv.org\/abs\/2405.14979","DOI":"10.48550\/ARXIV.2405.14979"},{"key":"e_1_3_3_2_27_1","unstructured":"Meshy LLC.2025. Meshy - Convert Text and Images to 3D Models. https:\/\/www.meshy.ai Accessed: 2025-01-22."},{"key":"e_1_3_3_2_28_1","volume-title":"7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"e_1_3_3_2_29_1","unstructured":"Baorui Ma Huachen Gao Haoge Deng Zhengxiong Luo Tiejun Huang Lulu Tang and Xinlong Wang. 2024. You See it You Got it: Learning 3D Creation on Pose-Free Videos at Scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.06699 (2024)."},{"key":"e_1_3_3_2_30_1","unstructured":"Midjourney Inc.2025. Midjourney. https:\/\/www.midjourney.com Accessed: 2025-01-22."},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"publisher","unstructured":"Alex Nichol Heewoo Jun Prafulla Dhariwal Pamela Mishkin and Mark Chen. 2022. Point-E: A System for Generating 3D Point Clouds from Complex Prompts. CoRR abs\/2212.08751 (2022). 10.48550\/ARXIV.2212.08751 arXiv:https:\/\/arXiv.org\/abs\/2212.08751","DOI":"10.48550\/ARXIV.2212.08751"},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv preprint (2023). 10.48550\/arXiv.2303.08774","DOI":"10.48550\/arXiv.2303.08774"},{"key":"e_1_3_3_2_33_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy\u00a0V. Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby Mido Assran Nicolas Ballas Wojciech Galuba Russell Howes Po-Yao Huang Shang-Wen Li Ishan Misra Michael Rabbat Vasu Sharma Gabriel Synnaeve Hu Xu Herv\u00e9 J\u00e9gou Julien Mairal Patrick Labatut Armand Joulin and Piotr Bojanowski. 2024. DINOv2: Learning Robust Visual Features without Supervision. Trans. Mach. Learn. Res. 2024 (2024). https:\/\/openreview.net\/forum?id=a68SUt6zFt"},{"key":"e_1_3_3_2_34_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Poole Ben","year":"2023","unstructured":"Ben Poole, Ajay Jain, Jonathan\u00a0T. Barron, and Ben Mildenhall. 2023. DreamFusion: Text-to-3D using 2D Diffusion. In International Conference on Learning Representations (ICLR). https:\/\/openreview.net\/forum?id=FjNys5c7VyY"},{"key":"e_1_3_3_2_35_1","unstructured":"Zhangyang Qi Yunhan Yang Mengchen Zhang Long Xing Xiaoyang Wu Tong Wu Dahua Lin Xihui Liu Jiaqi Wang and Hengshuang Zhao. 2024. Tailor3d: Customized 3d assets editing and generation with dual-side images. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.06191 (2024)."},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00946"},{"key":"e_1_3_3_2_37_1","first-page":"8748","volume-title":"International Conference on Machine Learning (ICML)","volume":"139","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning (ICML) , Vol.\u00a0139. 8748\u20138763."},{"key":"e_1_3_3_2_38_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et\u00a0al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","unstructured":"Tianhe Ren Shilong Liu Ailing Zeng Jing Lin Kunchang Li He Cao Jiayu Chen Xinyu Huang Yukang Chen Feng Yan Zhaoyang Zeng Hao Zhang Feng Li Jie Yang Hongyang Li Qing Jiang and Lei Zhang. 2024b. Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. CoRR abs\/2401.14159 (2024). 10.48550\/ARXIV.2401.14159 arXiv:https:\/\/arXiv.org\/abs\/2401.14159","DOI":"10.48550\/ARXIV.2401.14159"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00403"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657461"},{"key":"e_1_3_3_2_43_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Shi Yichun","year":"2024","unstructured":"Yichun Shi, Peng Wang, Jianglong Ye, Long Mai, Kejie Li, and Xiao Yang. 2024. MVDream: Multi-view Diffusion for 3D Generation. In International Conference on Learning Representations (ICLR). https:\/\/openreview.net\/forum?id=FUgrjq2pbB"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02000"},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","unstructured":"Yawar Siddiqui Tom Monnier Filippos Kokkinos Mahendra Kariya Yanir Kleiman Emilien Garreau Oran Gafni Natalia Neverova Andrea Vedaldi Roman Shapovalov and David Novotn\u00fd. 2024. Meta 3D AssetGen: Text-to-Mesh Generation with High-Quality Geometry Texture and PBR Materials. CoRR abs\/2407.02445 (2024). 10.48550\/ARXIV.2407.02445 arXiv:https:\/\/arXiv.org\/abs\/2407.02445","DOI":"10.48550\/ARXIV.2407.02445"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00184"},{"key":"e_1_3_3_2_47_1","volume-title":"European Conference Computer Vision (ECCV)","author":"Tang Jiaxiang","year":"2024","unstructured":"Jiaxiang Tang, Zhaoxi Chen, Xiaokang Chen, Tengfei Wang, Gang Zeng, and Ziwei Liu. 2024. LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation. In European Conference Computer Vision (ECCV)."},{"key":"e_1_3_3_2_48_1","unstructured":"VAST Inc.2025. Tripo AI. https:\/\/www.tripo3d.ai Accessed: 2025-01-22."},{"key":"e_1_3_3_2_49_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention Is All You Need. CoRR abs\/1706.03762 (2017). arXiv:https:\/\/arXiv.org\/abs\/1706.03762http:\/\/arxiv.org\/abs\/1706.03762"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73232-4_25"},{"key":"e_1_3_3_2_51_1","doi-asserted-by":"publisher","unstructured":"Zhengyi Wang Jonathan Lorraine Yikai Wang Hang Su Jun Zhu Sanja Fidler and Xiaohui Zeng. 2024. LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models. CoRR abs\/2411.09595 (2024). 10.48550\/ARXIV.2411.09595 arXiv:https:\/\/arXiv.org\/abs\/2411.09595","DOI":"10.48550\/ARXIV.2411.09595"},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","unstructured":"Shuang Wu Youtian Lin Feihu Zhang Yifei Zeng Jingxi Xu Philip Torr Xun Cao and Yao Yao. 2024a. Direct3D: Scalable Image-to-3D Generation via 3D Latent Diffusion Transformer. CoRR abs\/2405.14832 (2024). 10.48550\/ARXIV.2405.14832 arXiv:https:\/\/arXiv.org\/abs\/2405.14832","DOI":"10.48550\/ARXIV.2405.14832"},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02098"},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"crossref","unstructured":"Jianfeng Xiang Zelong Lv Sicheng Xu Yu Deng Ruicheng Wang Bowen Zhang Dong Chen Xin Tong and Jiaolong Yang. 2024. Structured 3D Latents for Scalable and Versatile 3D Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.01506 (2024).","DOI":"10.1109\/CVPR52734.2025.02000"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","unstructured":"Bojun Xiong Si-Tong Wei Xin-Yang Zheng Yan-Pei Cao Zhouhui Lian and Peng-Shuai Wang. 2024. OctFusion: Octree-based Diffusion Models for 3D Shape Generation. CoRR abs\/2408.14732 (2024). 10.48550\/ARXIV.2408.14732 arXiv:https:\/\/arXiv.org\/abs\/2408.14732","DOI":"10.48550\/ARXIV.2408.14732"},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"publisher","unstructured":"Jiale Xu Weihao Cheng Yiming Gao Xintao Wang Shenghua Gao and Ying Shan. 2024. InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models. CoRR abs\/2404.07191 (2024). 10.48550\/ARXIV.2404.07191 arXiv:https:\/\/arXiv.org\/abs\/2404.07191","DOI":"10.48550\/ARXIV.2404.07191"},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"publisher","unstructured":"Hu Ye Jun Zhang Sibo Liu Xiao Han and Wei Yang. 2023. IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models. CoRR abs\/2308.06721 (2023). 10.48550\/ARXIV.2308.06721 arXiv:https:\/\/arXiv.org\/abs\/2308.06721","DOI":"10.48550\/ARXIV.2308.06721"},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"publisher","unstructured":"Bowen Zhang Yiji Cheng Jiaolong Yang Chunyu Wang Feng Zhao Yansong Tang Dong Chen and Baining Guo. 2024b. GaussianCube: Structuring Gaussian Splatting using Optimal Transport for 3D Generative Modeling. CoRR abs\/2403.19655 (2024). 10.48550\/ARXIV.2403.19655 arXiv:https:\/\/arXiv.org\/abs\/2403.19655","DOI":"10.48550\/ARXIV.2403.19655"},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","unstructured":"Biao Zhang Jiapeng Tang Matthias Nie\u00dfner and Peter Wonka. 2023. 3DShape2VecSet: A 3D Shape Representation for Neural Fields and Generative Diffusion Models. ACM Trans. Graph. 42 4 Article 92 (jul 2023) 16\u00a0pages. 10.1145\/3592442","DOI":"10.1145\/3592442"},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72670-5_1"},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"publisher","unstructured":"Longwen Zhang Ziyu Wang Qixuan Zhang Qiwei Qiu Anqi Pang Haoran Jiang Wei Yang Lan Xu and Jingyi Yu. 2024c. CLAY: A Controllable Large-scale Generative Model for Creating High-quality 3D Assets. ACM Trans. Graph. 43 4 (2024) 120:1\u2013120:20. 10.1145\/3658146","DOI":"10.1145\/3658146"},{"key":"e_1_3_3_2_62_1","unstructured":"Zibo Zhao Zeqiang Lai Qingxiang Lin Yunfei Zhao Haolin Liu Shuhui Yang Yifei Feng Mingxin Yang Sheng Zhang Xianghui Yang Huiwen Shi Sicong Liu Junta Wu Yihang Lian Fan Yang Ruining Tang Zebin He Xinzhou Wang Jian Liu Xuhui Zuo Zhuo Chen Biwen Lei Haohan Weng Jing Xu Yiling Zhu Xinhai Liu Lixin Xu Changrong Hu Tianyu Huang Lifu Wang Jihong Zhang Meng Chen Liang Dong Yiwen Jia Yulin Cai Jiaao Yu Yixuan Tang Hao Zhang Zheng Ye Peng He Runzhou Wu Chao Zhang Yonghao Tan Jie Xiao Yangyu Tao Jianchen Zhu Jinbao Xue Kai Liu Chongqing Zhao Xinming Wu Zhichao Hu Lei Qin Jianbing Peng Zhan Li Minghui Chen Xipeng Zhang Lin Niu Paige Wang Yingkai Wang Haozhao Kuang Zhongyi Fan Xu Zheng Weihao Zhuang YingPing He Tian Liu Yong Yang Di Wang Yuhong Liu Jie Jiang Jingwei Huang and Chunchao Guo. 2025. Hunyuan3D 2.0: Scaling Diffusion Models for High Resolution Textured 3D Assets Generation. arxiv:https:\/\/arXiv.org\/abs\/2501.12202\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2501.12202"},{"key":"e_1_3_3_2_63_1","volume-title":"Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023","author":"Zhao Zibo","year":"2023","unstructured":"Zibo Zhao, Wen Liu, Xin Chen, Xianfang Zeng, Rui Wang, Pei Cheng, Bin Fu, Tao Chen, Gang Yu, and Shenghua Gao. 2023. Michelangelo: Conditional 3D Shape Generation based on Shape-Image-Text Aligned Latent Representation. In Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023, Alice Oh, Tristan Naumann, Amir Globerson, Kate Saenko, Moritz Hardt, and Sergey Levine (Eds.). http:\/\/papers.nips.cc\/paper_files\/paper\/2023\/hash\/ea1a7f7bc0fc14142106a84c94c826d0-Abstract-Conference.html"},{"key":"e_1_3_3_2_64_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Zhou Junsheng","year":"2024","unstructured":"Junsheng Zhou, Jinsheng Wang, Baorui Ma, Yu-Shen Liu, Tiejun Huang, and Xinlong Wang. 2024. Uni3d: Exploring unified 3d representation at scale. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_65_1","doi-asserted-by":"publisher","unstructured":"Jingyu Zhuang Di Kang Yan-Pei Cao Guanbin Li Liang Lin and Ying Shan. 2024. TIP-Editor: An Accurate 3D Editor Following Both Text-Prompts And Image-Prompts. ACM Trans. Graph. 43 4 (2024) 121:1\u2013121:12. 10.1145\/3658205","DOI":"10.1145\/3658205"},{"key":"e_1_3_3_2_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618190"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730648","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:51:11Z","timestamp":1774018271000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730648"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":65,"alternative-id":["10.1145\/3721238.3730648","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730648","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}