{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:32:58Z","timestamp":1778081578601,"version":"3.51.4"},"publisher-location":"Cham","reference-count":71,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732539","type":"print"},{"value":"9783031732546","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T00:00:00Z","timestamp":1732752000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T00:00:00Z","timestamp":1732752000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73254-6_8","type":"book-chapter","created":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T07:22:21Z","timestamp":1732692141000},"page":"124-141","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["DreamDissector: Learning Disentangled Text-to-3D Generation from\u00a02D Diffusion Priors"],"prefix":"10.1007","author":[{"given":"Zizheng","family":"Yan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiapeng","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fanpeng","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yushuang","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lingteng","family":"Qiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zisheng","family":"Ye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuguang","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanying","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoguang","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,28]]},"reference":[{"key":"8_CR1","unstructured":"Achlioptas, P., Diamanti, O., Mitliagkas, I., Guibas, L.: Learning representations and generative models for 3D point clouds. In: ICML (2018)"},{"key":"8_CR2","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Aberman, K., Fried, O., Cohen-Or, D., Lischinski, D.: Break-a-scene: extracting multiple concepts from a single image. arXiv preprint arXiv:2305.16311 (2023)","DOI":"10.1145\/3610548.3618154"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Bar-Tal, O., Ofri-Amar, D., Fridman, R., Kasten, Y., Dekel, T.: Text2live: text-driven layered image and video editing. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19784-0_41"},{"key":"8_CR4","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441","volume-title":"Convex Optimization","author":"SP Boyd","year":"2004","unstructured":"Boyd, S.P., Vandenberghe, L.: Convex Optimization. Cambridge University Press, Cambridge (2004)"},{"key":"8_CR5","unstructured":"Cen, J., et al.: Segment anything in 3D with NeRFs. arXiv preprint arXiv:2304.12308 (2023)"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Chen, D.Z., Siddiqui, Y., Lee, H.Y., Tulyakov, S., Nie\u00dfner, M.: Text2tex: text-driven texture synthesis via diffusion models. arXiv preprint arXiv:2303.11396 (2023)","DOI":"10.1109\/ICCV51070.2023.01701"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3D: disentangling geometry and appearance for high-quality text-to-3D content creation (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Z., Zhang, H.: Learning implicit fields for generative shape modeling. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00609"},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Drebin, R.A., Carpenter, L., Hanrahan, P.: Volume rendering. In: SIGGRAPH (1988)","DOI":"10.1145\/54852.378484"},{"key":"8_CR10","unstructured":"Epstein, D., Poole, B., Mildenhall, B., Efros, A.A., Holynski, A.: Disentangled 3D scene generation with layout learning. arXiv preprint arXiv:2402.16936 (2024)"},{"key":"8_CR11","unstructured":"Fridman, R., Abecasis, A., Kasten, Y., Dekel, T.: Scenescape: text-driven consistent scene generation (2023)"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Gao, G., Liu, W., Chen, A., Geiger, A., Sch\u00f6lkopf, B.: Graphdreamer: compositional 3D scene synthesis from scene graphs. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.02012"},{"key":"8_CR13","unstructured":"Gao, J., et al.: Get3D: a generative model of high quality 3D textured shapes learned from images. In: NeurIPS (2022)"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Henzler, P., Mitra, N.J., Ritschel, T.: Escaping Plato\u2019s cave: 3D shape from adversarial rendering. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.01008"},{"key":"8_CR15","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: NeurIPS (2020)"},{"key":"8_CR16","doi-asserted-by":"crossref","unstructured":"Hong, F., Zhang, M., Pan, L., Cai, Z., Yang, L., Liu, Z.: Avatarclip: zero-shot text-driven generation and animation of 3D avatars. In: SIGGRAPH (2022)","DOI":"10.1145\/3528223.3530094"},{"key":"8_CR17","unstructured":"Hong, S., Ahn, D., Kim, S.: Debiasing scores and prompts of 2D diffusion for view-consistent text-to-3D generation (2023)"},{"key":"8_CR18","unstructured":"Hong, Y., et al.: 3D-LLM: injecting the 3D world into large language models (2023)"},{"key":"8_CR19","unstructured":"Huang, Y., Wang, J., Shi, Y., Qi, X., Zha, Z.J., Zhang, L.: Dreamtime: an improved optimization strategy for text-to-3D content creation (2023)"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"H\u00f6llein, L., Cao, A., Owens, A., Johnson, J., Nie\u00dfner, M.: Text2room: extracting textured 3D meshes from 2D text-to-image models (2023)","DOI":"10.1109\/ICCV51070.2023.00727"},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Jain, A., Mildenhall, B., Barron, J.T., Abbeel, P., Poole, B.: Zero-shot text-guided object generation with dream fields. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00094"},{"key":"8_CR22","unstructured":"Khalid, N.M., Xie, T., Belilovsky, E., Popa, T.: CLIP-mesh: generating textured meshes from text using pretrained image-text models. In: SIGGRAPH Asia (2022)"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"8_CR24","unstructured":"Lee, H.H., Chang, A.X.: Understanding pure clip guidance for voxel grid NeRF models (2022)"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3D: high-resolution text-to-3D content creation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"8_CR26","unstructured":"Lin, Y., et al.: CompoNeRF: text-guided multi-object compositional NeRF with editable 3D scene layout. arXiv preprint arXiv:2303.13843 (2023)"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Hoorick, B.V., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3D object. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"8_CR28","unstructured":"Liu, Y., et al.: Syncdreamer: generating multiview-consistent images from a single-view image (2023)"},{"key":"8_CR29","unstructured":"Lunz, S., Li, Y., Fitzgibbon, A., Kushman, N.: Inverse graphics GAN: learning to generate 3D shapes from unstructured 2D data. arXiv preprint arXiv:2002.12674 (2020)"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Luo, S., Hu, W.: Diffusion probabilistic models for 3D point cloud generation. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00286"},{"key":"8_CR31","doi-asserted-by":"crossref","unstructured":"Melas-Kyriazi, L., Rupprecht, C., Laina, I., Vedaldi, A.: Realfusion: 360deg reconstruction of any object from a single image. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00816"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Mescheder, L., Oechsle, M., Niemeyer, M., Nowozin, S., Geiger, A.: Occupancy networks: learning 3D reconstruction in function space. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00459"},{"key":"8_CR33","doi-asserted-by":"crossref","unstructured":"Metzer, G., Richardson, E., Patashnik, O., Giryes, R., Cohen-Or, D.: Latent-NeRF for shape-guided generation of 3D shapes and textures. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"8_CR34","doi-asserted-by":"crossref","unstructured":"Michel, O., Bar-On, R., Liu, R., Benaim, S., Hanocka, R.: Text2mesh: text-driven neural stylization for meshes. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01313"},{"key":"8_CR35","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM (2021)","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"8_CR36","doi-asserted-by":"crossref","unstructured":"Mo, K., et al.: Structurenet: hierarchical graph networks for 3D shape generation. arXiv preprint arXiv:1908.00575 (2019)","DOI":"10.1145\/3355089.3356527"},{"key":"8_CR37","doi-asserted-by":"crossref","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. TOG (2022)","DOI":"10.1145\/3528223.3530127"},{"key":"8_CR38","unstructured":"Nichol, A., Jun, H., Dhariwal, P., Mishkin, P., Chen, M.: Point-e: a system for generating 3D point clouds from complex prompts. arXiv preprint arXiv:2212.08751 (2022)"},{"key":"8_CR39","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Geiger, A.: Giraffe: representing scenes as compositional generative neural feature fields. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01129"},{"key":"8_CR40","doi-asserted-by":"crossref","unstructured":"Po, R., Wetzstein, G.: Compositional 3D scene generation using locally conditioned diffusion. arXiv preprint arXiv:2303.12218 (2023)","DOI":"10.1109\/3DV62453.2024.00026"},{"key":"8_CR41","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3D using 2D diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"8_CR42","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"key":"8_CR43","doi-asserted-by":"crossref","unstructured":"Raj, A., et al.: Dreambooth3D: subject-driven text-to-3D generation (2023)","DOI":"10.1109\/ICCV51070.2023.00223"},{"key":"8_CR44","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. 3DVAR (2022)"},{"key":"8_CR45","unstructured":"Ren, T., et\u00a0al.: Grounded SAM: assembling open-world models for diverse visual tasks. arXiv preprint arXiv:2401.14159 (2024)"},{"key":"8_CR46","doi-asserted-by":"crossref","unstructured":"Richardson, E., Metzer, G., Alaluf, Y., Giryes, R., Cohen-Or, D.: Texture: text-guided texturing of 3D shapes. arXiv preprint arXiv:2302.01721 (2023)","DOI":"10.1145\/3588432.3591503"},{"key":"8_CR47","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"8_CR48","unstructured":"Saharia, C., et\u00a0al.: Photorealistic text-to-image diffusion models with deep language understanding. In: NeurIPS (2022)"},{"key":"8_CR49","unstructured":"Schuhmann, C., et\u00a0al.: Laion-5b: an open large-scale dataset for training next generation image-text models. In: NeurIPS (2022)"},{"key":"8_CR50","unstructured":"Schuhmann, C., et al.: Laion-400m: open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114 (2021)"},{"key":"8_CR51","unstructured":"Seo, J., et al.: Let 2D diffusion model know 3D-consistency for robust text-to-3D generation (2023)"},{"key":"8_CR52","unstructured":"Shen, T., Gao, J., Yin, K., Liu, M.Y., Fidler, S.: Deep marching tetrahedra: a hybrid representation for high-resolution 3D shape synthesis. In: NeurIPS (2021)"},{"key":"8_CR53","unstructured":"Shi, Y., Wang, P., Ye, J., Long, M., Li, K., Yang, X.: MVDream: multi-view diffusion for 3D generation (2023)"},{"key":"8_CR54","unstructured":"Smith, E.J., Meger, D.: Improved adversarial systems for 3D object generation and reconstruction. In: CoRL (2017)"},{"key":"8_CR55","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. In: NeurIPS (2019)"},{"key":"8_CR56","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score Jacobian chaining: lifting pretrained 2D diffusion models for 3D generation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"8_CR57","doi-asserted-by":"crossref","unstructured":"Wang, T., et al.: Rodin: a generative model for sculpting 3d digital avatars using diffusion. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00443"},{"key":"8_CR58","unstructured":"Wang, T., et al.: Pretraining is all you need for image-to-image translation (2022)"},{"key":"8_CR59","unstructured":"Wang, Z., et al.: Prolificdreamer: high-fidelity and diverse text-to-3D generation with variational score distillation (2023)"},{"key":"8_CR60","unstructured":"Wu, J., Zhang, C., Xue, T., Freeman, B., Tenenbaum, J.: Learning a probabilistic latent space of object shapes via 3D generative-adversarial modeling. In: NeurIPS (2016)"},{"key":"8_CR61","doi-asserted-by":"crossref","unstructured":"Wu, Y., et al.: SCoDA: domain adaptive shape completion for real scans. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01691"},{"key":"8_CR62","doi-asserted-by":"crossref","unstructured":"Xu, Z., Chen, Z., Zhang, Y., Song, Y., Wan, X., Li, G.: Bridging vision and language encoders: parameter-efficient tuning for referring image segmentation. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01605"},{"key":"8_CR63","doi-asserted-by":"crossref","unstructured":"Yang, G., Huang, X., Hao, Z., Liu, M.Y., Belongie, S., Hariharan, B.: Pointflow: 3D point cloud generation with continuous normalizing flows. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00464"},{"key":"8_CR64","doi-asserted-by":"crossref","unstructured":"Yang, J., et al.: LLM-grounder: open-vocabulary 3d visual grounding with large language model as an agent (2023)","DOI":"10.1109\/ICRA57147.2024.10610443"},{"key":"8_CR65","doi-asserted-by":"crossref","unstructured":"Yu, X., et\u00a0al.: MVImgNet: a large-scale dataset of multi-view images. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00883"},{"key":"8_CR66","unstructured":"Zeng, X., et al.: Lion: latent point diffusion models for 3D shape generation. In: NeurIPS (2022)"},{"key":"8_CR67","doi-asserted-by":"crossref","unstructured":"Zhang, J., Li, X., Wan, Z., Wang, C., Liao, J.: Text2NeRF: text-driven 3D scene generation with neural radiance fields (2023)","DOI":"10.1145\/3610548.3618190"},{"key":"8_CR68","unstructured":"Zhang, X., Zhao, W., Lu, X., Chien, J.: Text2layer: layered image generation using latent diffusion model. arXiv preprint arXiv:2307.09781 (2023)"},{"key":"8_CR69","unstructured":"Zhang, Y., et al.: Image GANs meet differentiable rendering for inverse graphics and interpretable 3D neural rendering. arXiv preprint arXiv:2010.09125 (2020)"},{"key":"8_CR70","doi-asserted-by":"crossref","unstructured":"Zhou, L., Du, Y., Wu, J.: 3D shape generation and completion through point-voxel diffusion. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00577"},{"key":"8_CR71","unstructured":"Zhou, X., et al.: Gala3D: towards text-to-3D complex scene generation via layout-guided generative Gaussian splatting. arXiv preprint arXiv:2402.07207 (2024)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73254-6_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T08:05:20Z","timestamp":1732694720000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73254-6_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,28]]},"ISBN":["9783031732539","9783031732546"],"references-count":71,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73254-6_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,28]]},"assertion":[{"value":"28 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}