{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:45:27Z","timestamp":1777567527785,"version":"3.51.4"},"publisher-location":"Cham","reference-count":56,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031923869","type":"print"},{"value":"9783031923876","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-92387-6_22","type":"book-chapter","created":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:42:29Z","timestamp":1748198549000},"page":"304-320","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["LucidDreaming: Controllable Object-Centric 3D Generation"],"prefix":"10.1007","author":[{"given":"Zhaoning","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3957-7061","authenticated-orcid":false,"given":"Chen","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"22_CR1","unstructured":"Armandpour, M., Zheng, H., Sadeghian, A., Sadeghian, A., Zhou, M.: Re-imagine the negative prompt algorithm: transform 2D diffusion into 3D, alleviate janus problem and beyond. arXiv preprint arXiv:2304.04968 (2023)"},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Tancik, M., Hedman, P., Martin-Brualla, R., Srinivasan, P.P.: Mip-NeRF: a multiscale representation for anti-aliasing neural radiance fields. In: ICCV, pp. 5855\u20135864 (2021)","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Verbin, D., Srinivasan, P.P., Hedman, P.: Mip-NeRF 360: unbounded anti-aliased neural radiance fields. In: CVPR, pp. 5470\u20135479 (2022)","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"22_CR4","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3D: disentangling geometry and appearance for high-quality text-to-3D content creation. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"22_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: It3D: improved text-to-3D generation with explicit view synthesis. arXiv preprint arXiv:2308.11473 (2023)","DOI":"10.1609\/aaai.v38i2.27886"},{"key":"22_CR6","doi-asserted-by":"crossref","unstructured":"Cohen-Bar, D., Richardson, E., Metzer, G., Giryes, R., Cohen-Or, D.: Set-the-scene: global-local training for generating controllable nerf scenes. arXiv preprint arXiv:2303.13450 (2023)","DOI":"10.1109\/ICCVW60793.2023.00314"},{"key":"22_CR7","unstructured":"Fang, S., et al.: Text-driven editing of 3D scenes without retraining. arXiv preprint arXiv:2309.04917 (2023)"},{"key":"22_CR8","unstructured":"Floyd, D.: Deepfloyd if. https:\/\/github.com\/deep-floyd\/IF (2023)"},{"key":"22_CR9","unstructured":"Gokhale, T., et al.: Benchmarking spatial relationships in text-to-image generation. arXiv preprint arXiv:2212.10015 (2022)"},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Gordon, O., Avrahami, O., Lischinski, D.: Blended-NeRF: zero-shot object generation and blending in existing neural radiance fields. arXiv preprint arXiv:2306.12760 (2023)","DOI":"10.1109\/ICCVW60793.2023.00316"},{"key":"22_CR11","unstructured":"Guo, M., Fathi, A., Wu, J., Funkhouser, T.: Object-centric neural scene rendering. arXiv preprint arXiv:2012.08503 (2020)"},{"key":"22_CR12","unstructured":"Guo, Y.C., et al.: Threestudio: a unified framework for 3D content generation. https:\/\/github.com\/threestudio-project\/threestudio (2023)"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Haque, A., Tancik, M., Efros, A., Holynski, A., Kanazawa, A.: Instruct-NeRF2NeRF: editing 3D scenes with instructions. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"22_CR14","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. NeurIPS 33, 6840\u20136851 (2020)","journal-title":"NeurIPS"},{"key":"22_CR15","unstructured":"Huang, K., Sun, K., Xie, E., Li, Z., Liu, X.: T2i-compbench: a comprehensive benchmark for open-world compositional text-to-image generation. arXiv preprint arXiv:2307.06350 (2023)"},{"key":"22_CR16","unstructured":"Huang, Y., Wang, J., Shi, Y., Qi, X., Zha, Z.J., Zhang, L.: Dreamtime: an improved optimization strategy for text-to-3D content creation. arXiv preprint arXiv:2306.12422 (2023)"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Kania, K., Yi, K.M., Kowalski, M., Trzci\u0144ski, T., Tagliasacchi, A.: CoNeRF: controllable neural radiance fields. In: CVPR, pp. 18623\u201318632 (2022)","DOI":"10.1109\/CVPR52688.2022.01807"},{"key":"22_CR18","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML, pp. 12888\u201312900. PMLR (2022)"},{"key":"22_CR19","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Focaldreamer: text-driven 3D editing via focal-fusion assembly. arXiv preprint arXiv:2308.10608 (2023)","DOI":"10.1609\/aaai.v38i4.28113"},{"key":"22_CR20","unstructured":"Lian, L., Li, B., Yala, A., Darrell, T.: LLM-grounded diffusion: enhancing prompt understanding of text-to-image diffusion models with large language models. arXiv preprint arXiv:2305.13655 (2023)"},{"key":"22_CR21","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3D: high-resolution text-to-3D content creation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"22_CR22","unstructured":"Liu, M., et\u00a0al.: One-2-3-45: any single image to 3D mesh in 45 seconds without per-shape optimization. arXiv preprint arXiv:2306.16928 (2023)"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Liu, N., Li, S., Du, Y., Torralba, A., Tenenbaum, J.B.: Compositional visual generation with composable diffusion models. In: ECCV, pp. 423\u2013439. Springer (2022)","DOI":"10.1007\/978-3-031-19790-1_26"},{"key":"22_CR24","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3D object. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"22_CR25","unstructured":"Liu, Y., et al.: Syncdreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"22_CR26","doi-asserted-by":"crossref","unstructured":"Marrs, A., Shirley, P., Wald, I. (eds.): Ray Tracing Gems II: Next Generation Real-Time Rendering with DXR, Vulkan, and OptiX. Apress (2021)","DOI":"10.1007\/978-1-4842-7185-8"},{"issue":"1","key":"22_CR27","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"22_CR28","doi-asserted-by":"crossref","unstructured":"Mirzaei, A., et al.: Reference-guided controllable inpainting of neural radiance fields. arXiv preprint arXiv:2304.09677 (2023)","DOI":"10.1109\/ICCV51070.2023.01633"},{"key":"22_CR29","doi-asserted-by":"crossref","unstructured":"Mirzaei, A., Kant, Y., Kelly, J., Gilitschenski, I.: Laterf: label and text driven object radiance fields. In: ECCV, pp. 20\u201336. Springer (2022)","DOI":"10.1007\/978-3-031-20062-5_2"},{"issue":"4","key":"22_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. (ToG) 41(4), 1\u201315 (2022)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"22_CR31","unstructured":"OpenAI: Gpt-4 technical report (2023)"},{"key":"22_CR32","doi-asserted-by":"crossref","unstructured":"Po, R., Wetzstein, G.: Compositional 3D scene generation using locally conditioned diffusion. arXiv preprint arXiv:2303.12218 (2023)","DOI":"10.1109\/3DV62453.2024.00026"},{"key":"22_CR33","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3D using 2D diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"22_CR34","unstructured":"Qian, G., et\u00a0al.: Magic123: one image to high-quality 3D object generation using both 2D and 3D diffusion priors. arXiv preprint arXiv:2306.17843 (2023)"},{"key":"22_CR35","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML, pp. 8748\u20138763. PMLR (2021)"},{"key":"22_CR36","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"22_CR37","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"22_CR38","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: CVPR, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"22_CR39","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S.E.A.: Photorealistic text-to-image diffusion models with deep language understanding. NeurIPS 35, 36479\u201336494 (2022)","journal-title":"NeurIPS"},{"key":"22_CR40","doi-asserted-by":"crossref","unstructured":"Sella, E., Fiebelman, G., Hedman, P., Averbuch-Elor, H.: Vox-e: text-guided voxel editing of 3D objects. In: ICCV, pp. 430\u2013440 (2023)","DOI":"10.1109\/ICCV51070.2023.00046"},{"key":"22_CR41","unstructured":"Shao, R., et al.: Control4D: dynamic portrait editing by learning 4D GAN from 2D diffusion-based editor. arXiv preprint arXiv:2305.20082 (2023)"},{"key":"22_CR42","unstructured":"Shi, Y., Wang, P., Ye, J., Long, M., Li, K., Yang, X.: MVDream: multi-view diffusion for 3D generation. arXiv preprint arXiv:2308.16512 (2023)"},{"key":"22_CR43","doi-asserted-by":"crossref","unstructured":"Shum, K.C., Kim, J., Hua, B.S., Nguyen, D.T., Yeung, S.K.: Language-driven object fusion into neural radiance fields with pose-conditioned dataset updates. arXiv preprint arXiv:2309.11281 (2023)","DOI":"10.1109\/CVPR52733.2024.00495"},{"key":"22_CR44","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"22_CR45","doi-asserted-by":"crossref","unstructured":"Tsalicoglou, C., Manhardt, F., Tonioni, A., Niemeyer, M., Tombari, F.: Textmesh: generation of realistic 3D meshes from text prompts. arXiv preprint arXiv:2304.12439 (2023)","DOI":"10.1109\/3DV62453.2024.00154"},{"key":"22_CR46","doi-asserted-by":"crossref","unstructured":"Wang, D., Zhang, T., Abboud, A., S\u00fcsstrunk, S.: Inpaintnerf360: text-guided 3D inpainting on unbounded neural radiance fields. arXiv preprint arXiv:2305.15094 (2023)","DOI":"10.1109\/CVPR52733.2024.01205"},{"key":"22_CR47","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score jacobian chaining: lifting pretrained 2D diffusion models for 3D generation. In: CVPR, pp. 12619\u201312629 (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"22_CR48","unstructured":"Wang, Z., et al.: Prolificdreamer: high-fidelity and diverse text-to-3D generation with variational score distillation. arXiv preprint arXiv:2305.16213 (2023)"},{"key":"22_CR49","doi-asserted-by":"crossref","unstructured":"Xie, J., et al.: Boxdiff: text-to-image synthesis with training-free box-constrained diffusion. In: ICCV, pp. 7452\u20137461 (2023)","DOI":"10.1109\/ICCV51070.2023.00685"},{"key":"22_CR50","doi-asserted-by":"crossref","unstructured":"Yang, B., et al.: Neumesh: learning disentangled neural mesh-based implicit field for geometry and texture editing. In: ECCV, pp. 597\u2013614. Springer (2022)","DOI":"10.1007\/978-3-031-19787-1_34"},{"key":"22_CR51","doi-asserted-by":"crossref","unstructured":"Yang, B., et al.: Learning object-compositional neural radiance field for editable scene rendering. In: ICCV, pp. 13779\u201313788 (2021)","DOI":"10.1109\/ICCV48922.2021.01352"},{"key":"22_CR52","doi-asserted-by":"crossref","unstructured":"Ye, J., Wang, P., Li, K., Shi, Y., Wang, H.: Consistent-1-to-3: consistent image to 3D view synthesis via geometry-aware diffusion models. arXiv preprint arXiv:2310.03020 (2023)","DOI":"10.1109\/3DV62453.2024.00027"},{"key":"22_CR53","doi-asserted-by":"crossref","unstructured":"Yuan, Y.J., Sun, Y.T., Lai, Y.K., Ma, Y., Jia, R., Gao, L.: NeRF-editing: geometry editing of neural radiance fields. In: CVPR, pp. 18353\u201318364 (2022)","DOI":"10.1109\/CVPR52688.2022.01781"},{"key":"22_CR54","unstructured":"Zhang, T., Zhang, Y., Vineet, V., Joshi, N., Wang, X.: Controllable text-to-image generation with GPT-4. arXiv preprint arXiv:2305.18583 (2023)"},{"key":"22_CR55","unstructured":"Zhu, J., Zhuang, P.: Hifa: high-fidelity text-to-3D with advanced diffusion guidance. arXiv preprint arXiv:2305.18766 (2023)"},{"key":"22_CR56","doi-asserted-by":"crossref","unstructured":"Zhuang, J., Wang, C., Liu, L., Lin, L., Li, G.: Dreameditor: text-driven 3D scene editing with neural fields. arXiv preprint arXiv:2306.13455 (2023)","DOI":"10.1145\/3610548.3618190"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-92387-6_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:42:41Z","timestamp":1748198561000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-92387-6_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031923869","9783031923876"],"references-count":56,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-92387-6_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}