{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T02:39:47Z","timestamp":1774924787426,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":94,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,13]],"date-time":"2024-07-13T00:00:00Z","timestamp":1720828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,13]]},"DOI":"10.1145\/3641519.3657425","type":"proceedings-article","created":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T10:39:28Z","timestamp":1720780768000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["Coin3D: Controllable and Interactive 3D Assets Generation with Proxy-Guided Conditioning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2373-2647","authenticated-orcid":false,"given":"Wenqi","family":"Dong","sequence":"first","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7604-5553","authenticated-orcid":false,"given":"Bangbang","family":"Yang","sequence":"additional","affiliation":[{"name":"ByteDance, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2568-2735","authenticated-orcid":false,"given":"Lin","family":"Ma","sequence":"additional","affiliation":[{"name":"ByteDance, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6073-030X","authenticated-orcid":false,"given":"Xiao","family":"Liu","sequence":"additional","affiliation":[{"name":"ByteDance, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7074-3990","authenticated-orcid":false,"given":"Liyuan","family":"Cui","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2662-0334","authenticated-orcid":false,"given":"Hujun","family":"Bao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7734-7053","authenticated-orcid":false,"given":"Yuewen","family":"Ma","sequence":"additional","affiliation":[{"name":"ByteDance, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7130-439X","authenticated-orcid":false,"given":"Zhaopeng","family":"Cui","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"International conference on machine learning. PMLR, 40\u201349","author":"Achlioptas Panos","year":"2018","unstructured":"Panos Achlioptas, Olga Diamanti, Ioannis Mitliagkas, and Leonidas Guibas. 2018. Learning representations and generative models for 3d point clouds. In International conference on machine learning. PMLR, 40\u201349."},{"key":"e_1_3_2_2_2_1","volume-title":"GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image. arXiv preprint arXiv:2404.02152","author":"Bao Chong","year":"2024","unstructured":"Chong Bao, Yinda Zhang, Yuan Li, Xiyu Zhang, Bangbang Yang, Hujun Bao, Marc Pollefeys, Guofeng Zhang, and Zhaopeng Cui. 2024. GeneAvatar: Generic Expression-Aware Volumetric Head Avatar Editing from a Single Image. arXiv preprint arXiv:2404.02152 (2024)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02004"},{"key":"e_1_3_2_2_4_1","volume-title":"Multidiffusion: Fusing diffusion paths for controlled image generation.","author":"Bar-Tal Omer","year":"2023","unstructured":"Omer Bar-Tal, Lior Yariv, Yaron Lipman, and Tali Dekel. 2023. Multidiffusion: Fusing diffusion paths for controlled image generation. (2023)."},{"key":"e_1_3_2_2_5_1","volume-title":"LooseControl: Lifting ControlNet for Generalized Depth Conditioning. arXiv preprint arXiv:2312.03079","author":"Bhat Shariq\u00a0Farooq","year":"2023","unstructured":"Shariq\u00a0Farooq Bhat, Niloy\u00a0J Mitra, and Peter Wonka. 2023. LooseControl: Lifting ControlNet for Generalized Depth Conditioning. arXiv preprint arXiv:2312.03079 (2023)."},{"key":"e_1_3_2_2_6_1","unstructured":"Ollin\u00a0Boer Bohan. 2023. Tiny AutoEncoder for Stable Diffusion. https:\/\/github.com\/madebyollin\/taesd. Accessed: 2023-10-03."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00574"},{"key":"e_1_3_2_2_9_1","volume-title":"Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:1512.03012","author":"Chang X","year":"2015","unstructured":"Angel\u00a0X Chang, Thomas Funkhouser, Leonidas Guibas, Pat Hanrahan, Qixing Huang, Zimo Li, Silvio Savarese, Manolis Savva, Shuran Song, Hao Su, 2015. Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:1512.03012 (2015)."},{"key":"e_1_3_2_2_10_1","volume-title":"Fantasia3d: Disentangling geometry and appearance for high-quality text-to-3d content creation. arXiv preprint arXiv:2303.13873","author":"Chen Rui","year":"2023","unstructured":"Rui Chen, Yongwei Chen, Ningxin Jiao, and Kui Jia. 2023a. Fantasia3d: Disentangling geometry and appearance for high-quality text-to-3d content creation. arXiv preprint arXiv:2303.13873 (2023)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612489"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19781-9_42"},{"key":"e_1_3_2_2_13_1","volume-title":"Progressive3d: Progressively local editing for text-to-3d content creation with complex semantic prompts. arXiv preprint arXiv:2310.11784","author":"Cheng Xinhua","year":"2023","unstructured":"Xinhua Cheng, Tianyu Yang, Jianan Wang, Yu Li, Lei Zhang, Jian Zhang, and Li Yuan. 2023b. Progressive3d: Progressively local editing for text-to-3d content creation with complex semantic prompts. arXiv preprint arXiv:2310.11784 (2023)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00433"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_38"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00314"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00431"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00823"},{"key":"e_1_3_2_2_20_1","volume-title":"The proposed uscf rating system, its development, theory, and applications. Chess life 22, 8","author":"Elo E","year":"1967","unstructured":"Arpad\u00a0E Elo. 1967. The proposed uscf rating system, its development, theory, and applications. Chess life 22, 8 (1967), 242\u2013247."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_36"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.264"},{"key":"e_1_3_2_2_23_1","first-page":"31841","article-title":"Get3d: A generative model of high quality 3d textured shapes learned from images","volume":"35","author":"Gao Jun","year":"2022","unstructured":"Jun Gao, Tianchang Shen, Zian Wang, Wenzheng Chen, Kangxue Yin, Daiqing Li, Or Litany, Zan Gojcic, and Sanja Fidler. 2022. Get3d: A generative model of high quality 3d textured shapes learned from images. Advances In Neural Information Processing Systems 35 (2022), 31841\u201331854.","journal-title":"Advances In Neural Information Processing Systems"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00030"},{"key":"e_1_3_2_2_25_1","volume-title":"Stylenerf: A style-based 3d-aware generator for high-resolution image synthesis. arXiv preprint arXiv:2110.08985","author":"Gu Jiatao","year":"2021","unstructured":"Jiatao Gu, Lingjie Liu, Peng Wang, and Christian Theobalt. 2021. Stylenerf: A style-based 3d-aware generator for high-resolution image synthesis. arXiv preprint arXiv:2110.08985 (2021)."},{"key":"e_1_3_2_2_26_1","volume-title":"Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725","author":"Guo Yuwei","year":"2023","unstructured":"Yuwei Guo, Ceyuan Yang, Anyi Rao, Yaohui Wang, Yu Qiao, Dahua Lin, and Bo Dai. 2023. Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)."},{"key":"e_1_3_2_2_27_1","unstructured":"Gustavosta. 2023. MagicPrompt. https:\/\/huggingface.co\/Gustavosta\/MagicPrompt-Stable-Diffusion. Accessed: 2023-10-03."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01381"},{"key":"e_1_3_2_2_29_1","volume-title":"Instruct-nerf2nerf: Editing 3d scenes with instructions. arXiv preprint arXiv:2303.12789","author":"Haque Ayaan","year":"2023","unstructured":"Ayaan Haque, Matthew Tancik, Alexei\u00a0A Efros, Aleksander Holynski, and Angjoo Kanazawa. 2023. Instruct-nerf2nerf: Editing 3d scenes with instructions. arXiv preprint arXiv:2303.12789 (2023)."},{"key":"e_1_3_2_2_30_1","unstructured":"Yuze He Yushi Bai Matthieu Lin Wang Zhao Yubin Hu Jenny Sheng Ran Yi Juanzi Li and Yong-Jin Liu. 2023. T3Bench: Benchmarking Current Progress in Text-to-3D Generation. arxiv:2310.02977\u00a0[cs.CV]"},{"key":"e_1_3_2_2_31_1","volume-title":"Avatarclip: Zero-shot text-driven generation and animation of 3d avatars. arXiv preprint arXiv:2205.08535","author":"Hong Fangzhou","year":"2022","unstructured":"Fangzhou Hong, Mingyuan Zhang, Liang Pan, Zhongang Cai, Lei Yang, and Ziwei Liu. 2022. Avatarclip: Zero-shot text-driven generation and animation of 3d avatars. arXiv preprint arXiv:2205.08535 (2022)."},{"key":"e_1_3_2_2_32_1","volume-title":"Lrm: Large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400","author":"Hong Yicong","year":"2023","unstructured":"Yicong Hong, Kai Zhang, Jiuxiang Gu, Sai Bi, Yang Zhou, Difan Liu, Feng Liu, Kalyan Sunkavalli, Trung Bui, and Hao Tan. 2023. Lrm: Large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400 (2023)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555513"},{"key":"e_1_3_2_2_34_1","volume-title":"Shap-e: Generating conditional 3d implicit functions. arXiv preprint arXiv:2305.02463","author":"Jun Heewoo","year":"2023","unstructured":"Heewoo Jun and Alex Nichol. 2023. Shap-e: Generating conditional 3d implicit functions. arXiv preprint arXiv:2305.02463 (2023)."},{"key":"e_1_3_2_2_35_1","volume-title":"Instruct 3D-to-3D: Text Instruction Guided 3D-to-3D conversion. arXiv preprint arXiv:2303.15780","author":"Kamata Hiromichi","year":"2023","unstructured":"Hiromichi Kamata, Yuiko Sakuma, Akio Hayakawa, Masato Ishii, and Takuya Narihira. 2023. Instruct 3D-to-3D: Text Instruction Guided 3D-to-3D conversion. arXiv preprint arXiv:2303.15780 (2023)."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_23"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01287"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01216"},{"key":"e_1_3_2_2_39_1","volume-title":"Focaldreamer: Text-driven 3d editing via focal-fusion assembly. arXiv preprint arXiv:2308.10608","author":"Li Yuhan","year":"2023","unstructured":"Yuhan Li, Yishun Dou, Yue Shi, Yu Lei, Xuanhong Chen, Yi Zhang, Peng Zhou, and Bingbing Ni. 2023a. Focaldreamer: Text-driven 3d editing via focal-fusion assembly. arXiv preprint arXiv:2308.10608 (2023)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"e_1_3_2_2_41_1","volume-title":"Fast single image to 3d objects with consistent multi-view generation and 3d diffusion. arXiv preprint arXiv:2311.07885","author":"Liu Minghua","year":"2023","unstructured":"Minghua Liu, Ruoxi Shi, Linghao Chen, Zhuoyang Zhang, Chao Xu, Xinyue Wei, Hansheng Chen, Chong Zeng, Jiayuan Gu, and Hao Su. 2023b. One-2-3-45++: Fast single image to 3d objects with consistent multi-view generation and 3d diffusion. arXiv preprint arXiv:2311.07885 (2023)."},{"key":"e_1_3_2_2_42_1","volume-title":"One-2-3-45: Any single image to 3d mesh in 45 seconds without per-shape optimization. arXiv preprint arXiv:2306.16928","author":"Liu Minghua","year":"2023","unstructured":"Minghua Liu, Chao Xu, Haian Jin, Linghao Chen, Zexiang Xu, Hao Su, 2023d. One-2-3-45: Any single image to 3d mesh in 45 seconds without per-shape optimization. arXiv preprint arXiv:2306.16928 (2023)."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"e_1_3_2_2_44_1","volume-title":"SyncDreamer: Generating Multiview-consistent Images from a Single-view Image. arXiv preprint arXiv:2309.03453","author":"Liu Yuan","year":"2023","unstructured":"Yuan Liu, Cheng Lin, Zijiao Zeng, Xiaoxiao Long, Lingjie Liu, Taku Komura, and Wenping Wang. 2023a. SyncDreamer: Generating Multiview-consistent Images from a Single-view Image. arXiv preprint arXiv:2309.03453 (2023)."},{"key":"e_1_3_2_2_45_1","volume-title":"Wonder3d: Single image to 3d using cross-domain diffusion. arXiv preprint arXiv:2310.15008","author":"Long Xiaoxiao","year":"2023","unstructured":"Xiaoxiao Long, Yuan-Chen Guo, Cheng Lin, Yuan Liu, Zhiyang Dou, Lingjie Liu, Yuexin Ma, Song-Hai Zhang, Marc Habermann, Christian Theobalt, 2023. Wonder3d: Single image to 3d using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_2_47_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8446\u20138455","author":"Melas-Kyriazi Luke","year":"2023","unstructured":"Luke Melas-Kyriazi, Iro Laina, Christian Rupprecht, and Andrea Vedaldi. 2023. Realfusion: 360deg reconstruction of any object from a single image. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8446\u20138455."},{"key":"e_1_3_2_2_48_1","volume-title":"Face generation and editing with stylegan: A survey","author":"Melnik Andrew","year":"2024","unstructured":"Andrew Melnik, Maksim Miasayedzenkau, Dzianis Makaravets, Dzianis Pirshtuk, Eren Akbulut, Dennis Holzmann, Tarek Renusch, Gustav Reichert, and Helge Ritter. 2024. Face generation and editing with stylegan: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024)."},{"key":"e_1_3_2_2_49_1","volume-title":"Sdedit: Guided image synthesis and editing with stochastic differential equations. arXiv preprint arXiv:2108.01073","author":"Meng Chenlin","year":"2021","unstructured":"Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, and Stefano Ermon. 2021. Sdedit: Guided image synthesis and editing with stochastic differential equations. arXiv preprint arXiv:2108.01073 (2021)."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00459"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"e_1_3_2_2_52_1","volume-title":"T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453","author":"Mou Chong","year":"2023","unstructured":"Chong Mou, Xintao Wang, Liangbin Xie, Jian Zhang, Zhongang Qi, Ying Shan, and Xiaohu Qie. 2023. T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)."},{"key":"e_1_3_2_2_53_1","volume-title":"International conference on machine learning. PMLR, 7220\u20137229","author":"Nash Charlie","year":"2020","unstructured":"Charlie Nash, Yaroslav Ganin, SM\u00a0Ali Eslami, and Peter Battaglia. 2020. Polygen: An autoregressive generative model of 3d meshes. In International conference on machine learning. PMLR, 7220\u20137229."},{"key":"e_1_3_2_2_54_1","volume-title":"Construction play and cognitive skills associated with the development of mathematical abilities in 7-year-old children. Learning and instruction 32","author":"Nath Swiya","year":"2014","unstructured":"Swiya Nath and D\u00e9nes Sz\u00fccs. 2014. Construction play and cognitive skills associated with the development of mathematical abilities in 7-year-old children. Learning and instruction 32 (2014), 73\u201380."},{"key":"e_1_3_2_2_55_1","volume-title":"Point-e: A system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:2212.08751","author":"Nichol Alex","year":"2022","unstructured":"Alex Nichol, Heewoo Jun, Prafulla Dhariwal, Pamela Mishkin, and Mark Chen. 2022. Point-e: A system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:2212.08751 (2022)."},{"key":"e_1_3_2_2_56_1","volume-title":"Diffusion Handles: Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D. arXiv preprint arXiv:2312.02190","author":"Pandey Karran","year":"2023","unstructured":"Karran Pandey, Paul Guerrero, Matheus Gadelha, Yannick Hold-Geoffroy, Karan Singh, and Niloy Mitra. 2023. Diffusion Handles: Enabling 3D Edits for Diffusion Models by Lifting Activations to 3D. arXiv preprint arXiv:2312.02190 (2023)."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00025"},{"key":"e_1_3_2_2_58_1","volume-title":"Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988","author":"Poole Ben","year":"2022","unstructured":"Ben Poole, Ajay Jain, Jonathan\u00a0T Barron, and Ben Mildenhall. 2022. Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)."},{"key":"e_1_3_2_2_59_1","volume-title":"Magic123: One image to high-quality 3d object generation using both 2d and 3d diffusion priors. arXiv preprint arXiv:2306.17843","author":"Qian Guocheng","year":"2023","unstructured":"Guocheng Qian, Jinjie Mai, Abdullah Hamdi, Jian Ren, Aliaksandr Siarohin, Bing Li, Hsin-Ying Lee, Ivan Skorokhodov, Peter Wonka, Sergey Tulyakov, 2023. Magic123: One image to high-quality 3d object generation using both 2d and 3d diffusion priors. arXiv preprint arXiv:2306.17843 (2023)."},{"key":"e_1_3_2_2_60_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_2_61_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_2_62_1","volume-title":"Dreambooth3d: Subject-driven text-to-3d generation. arXiv preprint arXiv:2303.13508","author":"Raj Amit","year":"2023","unstructured":"Amit Raj, Srinivas Kaza, Ben Poole, Michael Niemeyer, Nataniel Ruiz, Ben Mildenhall, Shiran Zada, Kfir Aberman, Michael Rubinstein, Jonathan Barron, 2023. Dreambooth3d: Subject-driven text-to-3d generation. arXiv preprint arXiv:2303.13508 (2023)."},{"key":"e_1_3_2_2_63_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1, 2","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1, 2 (2022), 3."},{"key":"e_1_3_2_2_64_1","volume-title":"TEXTure: Text-guided texturing of 3d shapes. arXiv preprint arXiv:2302.01721","author":"Richardson Elad","year":"2023","unstructured":"Elad Richardson, Gal Metzer, Yuval Alaluf, Raja Giryes, and Daniel Cohen-Or. 2023. TEXTure: Text-guided texturing of 3d shapes. arXiv preprint arXiv:2302.01721 (2023)."},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_66_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily\u00a0L Denton, Kamyar Ghasemipour, Raphael Gontijo\u00a0Lopes, Burcu Karagol\u00a0Ayan, Tim Salimans, 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems 35 (2022), 36479\u201336494.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01805"},{"key":"e_1_3_2_2_68_1","volume-title":"Let 2d diffusion model know 3d-consistency for robust text-to-3d generation. arXiv preprint arXiv:2303.07937","author":"Seo Junyoung","year":"2023","unstructured":"Junyoung Seo, Wooseok Jang, Min-Seop Kwak, Jaehoon Ko, Hyeonsu Kim, Junho Kim, Jin-Hwa Kim, Jiyoung Lee, and Seungryong Kim. 2023. Let 2d diffusion model know 3d-consistency for robust text-to-3d generation. arXiv preprint arXiv:2303.07937 (2023)."},{"key":"e_1_3_2_2_69_1","first-page":"6087","article-title":"Deep marching tetrahedra: a hybrid representation for high-resolution 3d shape synthesis","volume":"34","author":"Shen Tianchang","year":"2021","unstructured":"Tianchang Shen, Jun Gao, Kangxue Yin, Ming-Yu Liu, and Sanja Fidler. 2021. Deep marching tetrahedra: a hybrid representation for high-resolution 3d shape synthesis. Advances in Neural Information Processing Systems 34 (2021), 6087\u20136101.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_70_1","volume-title":"a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110","author":"Shi Ruoxi","year":"2023","unstructured":"Ruoxi Shi, Hansheng Chen, Zhuoyang Zhang, Minghua Liu, Chao Xu, Xinyue Wei, Linghao Chen, Chong Zeng, and Hao Su. 2023a. Zero123++: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110 (2023)."},{"key":"e_1_3_2_2_71_1","volume-title":"MVDream: Multi-view Diffusion for 3D Generation. arXiv:2308.16512","author":"Shi Yichun","year":"2023","unstructured":"Yichun Shi, Peng Wang, Jianglong Ye, Long Mai, Kejie Li, and Xiao Yang. 2023b. MVDream: Multi-view Diffusion for 3D Generation. arXiv:2308.16512 (2023)."},{"key":"e_1_3_2_2_72_1","first-page":"24487","article-title":"Epigraf: Rethinking training of 3d gans","volume":"35","author":"Skorokhodov Ivan","year":"2022","unstructured":"Ivan Skorokhodov, Sergey Tulyakov, Yiqun Wang, and Peter Wonka. 2022. Epigraf: Rethinking training of 3d gans. Advances in Neural Information Processing Systems 35 (2022), 24487\u201324501.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_73_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_2_74_1","volume-title":"Dreamgaussian: Generative gaussian splatting for efficient 3d content creation. arXiv preprint arXiv:2309.16653","author":"Tang Jiaxiang","year":"2023","unstructured":"Jiaxiang Tang, Jiawei Ren, Hang Zhou, Ziwei Liu, and Gang Zeng. 2023a. Dreamgaussian: Generative gaussian splatting for efficient 3d content creation. arXiv preprint arXiv:2309.16653 (2023)."},{"key":"e_1_3_2_2_75_1","volume-title":"Make-it-3d: High-fidelity 3d creation from a single image with diffusion prior. arXiv preprint arXiv:2303.14184","author":"Tang Junshu","year":"2023","unstructured":"Junshu Tang, Tengfei Wang, Bo Zhang, Ting Zhang, Ran Yi, Lizhuang Ma, and Dong Chen. 2023b. Make-it-3d: High-fidelity 3d creation from a single image with diffusion prior. arXiv preprint arXiv:2303.14184 (2023)."},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_4"},{"key":"e_1_3_2_2_78_1","volume-title":"Neus: Learning neural implicit surfaces by","author":"Wang Peng","year":"2021","unstructured":"Peng Wang, Lingjie Liu, Yuan Liu, Christian Theobalt, Taku Komura, and Wenping Wang. 2021. Neus: Learning neural implicit surfaces by volume rendering for multi-view reconstruction. arXiv preprint arXiv:2106.10689 (2021)."},{"key":"e_1_3_2_2_79_1","volume-title":"PF-LRM: Pose-Free Large Reconstruction Model for Joint Pose and Shape Prediction. arXiv preprint arXiv:2311.12024","author":"Wang Peng","year":"2023","unstructured":"Peng Wang, Hao Tan, Sai Bi, Yinghao Xu, Fujun Luan, Kalyan Sunkavalli, Wenping Wang, Zexiang Xu, and Kai Zhang. 2023b. PF-LRM: Pose-Free Large Reconstruction Model for Joint Pose and Shape Prediction. arXiv preprint arXiv:2311.12024 (2023)."},{"key":"e_1_3_2_2_80_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc\u00a0V Le, Denny Zhou, 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems 35 (2022), 24824\u201324837.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_81_1","volume-title":"Marrnet: 3d shape reconstruction via 2.5 d sketches. Advances in neural information processing systems 30","author":"Wu Jiajun","year":"2017","unstructured":"Jiajun Wu, Yifan Wang, Tianfan Xue, Xingyuan Sun, Bill Freeman, and Josh Tenenbaum. 2017. Marrnet: 3d shape reconstruction via 2.5 d sketches. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_82_1","volume-title":"arXiv preprint arXiv:2401.04092","author":"Wu Tong","year":"2024","unstructured":"Tong Wu, Guandao Yang, Zhibing Li, Kai Zhang, Ziwei Liu, Leonidas Guibas, Dahua Lin, and Gordon Wetzstein. 2024. GPT-4V (ision) is a Human-Aligned Evaluator for Text-to-3D Generation. arXiv preprint arXiv:2401.04092 (2024)."},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00278"},{"key":"e_1_3_2_2_84_1","unstructured":"Jiazheng Xu Xiao Liu Yuchen Wu Yuxuan Tong Qinkai Li Ming Ding Jie Tang and Yuxiao Dong. 2023a. ImageReward: Learning and Evaluating Human Preferences for Text-to-Image Generation. arxiv:2304.05977\u00a0[cs.CV]"},{"key":"e_1_3_2_2_85_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02003"},{"key":"e_1_3_2_2_86_1","volume-title":"Dmv3d: Denoising multi-view diffusion using 3d large reconstruction model. arXiv preprint arXiv:2311.09217","author":"Xu Yinghao","year":"2023","unstructured":"Yinghao Xu, Hao Tan, Fujun Luan, Sai Bi, Peng Wang, Jiahao Li, Zifan Shi, Kalyan Sunkavalli, Gordon Wetzstein, Zexiang Xu, 2023b. Dmv3d: Denoising multi-view diffusion using 3d large reconstruction model. arXiv preprint arXiv:2311.09217 (2023)."},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_34"},{"key":"e_1_3_2_2_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/VR58804.2024.00085"},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3528223.3530163","article-title":"Neural rendering in a room: amodal 3d understanding and free-viewpoint rendering for the closed scene composed of pre-captured objects","volume":"41","author":"Yang Bangbang","year":"2022","unstructured":"Bangbang Yang, Yinda Zhang, Yijin Li, Zhaopeng Cui, Sean Fanello, Hujun Bao, and Guofeng Zhang. 2022b. Neural rendering in a room: amodal 3d understanding and free-viewpoint rendering for the closed scene composed of pre-captured objects. ACM Transactions on Graphics (TOG) 41, 4 (2022), 1\u201310.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"e_1_3_2_2_90_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01352"},{"key":"e_1_3_2_2_91_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_47"},{"key":"e_1_3_2_2_92_1","volume-title":"Pushing the Limits of 3D Shape Generation at Scale. arXiv preprint arXiv:2306.11510","author":"Yu Wang","year":"2023","unstructured":"Wang Yu, Xuelin Qian, Jingyang Huo, Tiejun Huang, Bo Zhao, and Yanwei Fu. 2023. Pushing the Limits of 3D Shape Generation at Scale. arXiv preprint arXiv:2306.11510 (2023)."},{"key":"e_1_3_2_2_93_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_94_1","volume-title":"RePaint-NeRF: NeRF Editting via Semantic Masks and Diffusion Models. arXiv preprint arXiv:2306.05668","author":"Zhou Xingchen","year":"2023","unstructured":"Xingchen Zhou, Ying He, F\u00a0Richard Yu, Jianqiang Li, and You Li. 2023. RePaint-NeRF: NeRF Editting via Semantic Masks and Diffusion Models. arXiv preprint arXiv:2306.05668 (2023)."}],"event":{"name":"SIGGRAPH '24: Special Interest Group on Computer Graphics and Interactive Techniques Conference","location":"Denver CO USA","acronym":"SIGGRAPH '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657425","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3641519.3657425","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:36Z","timestamp":1750295376000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657425"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,13]]},"references-count":94,"alternative-id":["10.1145\/3641519.3657425","10.1145\/3641519"],"URL":"https:\/\/doi.org\/10.1145\/3641519.3657425","relation":{},"subject":[],"published":{"date-parts":[[2024,7,13]]},"assertion":[{"value":"2024-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}