{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T16:44:44Z","timestamp":1779295484553,"version":"3.51.4"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730122","type":"print"},{"value":"9783031730139","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:00:00Z","timestamp":1732665600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:00:00Z","timestamp":1732665600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73013-9_21","type":"book-chapter","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T07:47:36Z","timestamp":1732607256000},"page":"362-378","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["SceneTeller: Language-to-3D Scene Generation"],"prefix":"10.1007","author":[{"given":"Ba\u015fak Melis","family":"\u00d6cal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Maxim","family":"Tatarchenko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sezer","family":"Karao\u011flu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Theo","family":"Gevers","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,27]]},"reference":[{"key":"21_CR1","unstructured":"Achiam, J., et\u00a0al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"21_CR2","unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 35, pp. 23716\u201323736 (2022)"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Tancik, M., Hedman, P., Martin-Brualla, R., Srinivasan, P.P.: MIP-NeRF: a multiscale representation for anti-aliasing neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (CVPR), pp. 5855\u20135864 (2021)","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Chen, A., Xu, Z., Geiger, A., Yu, J., Su, H.: Tensorf: tensorial radiance fields. In: European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-19824-3_20"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, Y., Jiao, N., Jia, K.: Fantasia3D: disentangling geometry and appearance for high-quality text-to-3D content creation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Hallucinated neural radiance fields in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12943\u201312952 (2022)","DOI":"10.1109\/CVPR52688.2022.01260"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: Gaussianeditor: swift and controllable 3D editing with gaussian splatting. arXiv preprint arXiv:2311.14521 (2023)","DOI":"10.1109\/CVPR52733.2024.02029"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, F., Liu, H.: Text-to-3D using Gaussian splatting. arXiv preprint arXiv:2309.16585 (2023)","DOI":"10.1109\/CVPR52733.2024.02022"},{"key":"21_CR10","unstructured":"Chung, J., Lee, S., Nam, H., Lee, J., Lee, K.M.: Luciddreamer: domain-free generation of 3D Gaussian splatting scenes. arXiv preprint arXiv:2311.13384 (2023)"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Cohen-Bar, D., Richardson, E., Metzer, G., Giryes, R., Cohen-Or, D.: Set-the-scene: global-local training for generating controllable nerf scenes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops, pp. 2920\u20132929 (2023)","DOI":"10.1109\/ICCVW60793.2023.00314"},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: Scannet: richly-annotated 3D reconstructions of indoor scenes. In: Proceedings of the Computer Vision and Pattern Recognition (CVPR). IEEE (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Fang, J., Wang, J., Zhang, X., Xie, L., Tian, Q.: Gaussianeditor: editing 3D Gaussians delicately with text instructions. arXiv preprint arXiv:2311.16037 (2023)","DOI":"10.1109\/CVPR52733.2024.01975"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Fang, J., Wang, J., Zhang, X., Xie, L., Tian, Q.: Gaussianeditor: editing 3D Gaussians delicately with text instructions. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.01975"},{"key":"21_CR15","unstructured":"Feng, W., et al.: LayoutGPT: compositional visual planning and generation with large language models. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 36 (2024)"},{"key":"21_CR16","unstructured":"Fridman, R., Abecasis, A., Kasten, Y., Dekel, T.: Scenescape: text-driven consistent scene generation. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 36 (2024)"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"Fridovich-Keil, S., Yu, A., Tancik, M., Chen, Q., Recht, B., Kanazawa, A.: Plenoxels: radiance fields without neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5501\u20135510 (2022)","DOI":"10.1109\/CVPR52688.2022.00542"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Fu, H., et\u00a0al.: 3D-front: 3D furnished rooms with layouts and semantics. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10933\u201310942 (2021)","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"21_CR19","doi-asserted-by":"publisher","first-page":"3313","DOI":"10.1007\/s11263-021-01534-z","volume":"129","author":"H Fu","year":"2021","unstructured":"Fu, H., Jia, R., Gao, L., Gong, M., Zhao, B., Maybank, S., Tao, D.: 3D-future: 3D furniture shape with texture. Int. J. Comput. Vis. 129, 3313\u20133337 (2021)","journal-title":"Int. J. Comput. Vis."},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Haque, A., Tancik, M., Efros, A., Holynski, A., Kanazawa, A.: Instruct-nerf2nerf: editing 3D scenes with instructions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (CVPR) (2023)","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"21_CR21","doi-asserted-by":"publisher","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Le\u00a0Bras, R., Choi, Y.: CLIPScore: a reference-free evaluation metric for image captioning. In: Moens, M.F., Huang, X., Specia, L., Yih, S.W.t. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 7514\u20137528. Association for Computational Linguistics, Online and Punta Cana, Dominican Republic (2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.595. https:\/\/aclanthology.org\/2021.emnlp-main.595","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"21_CR22","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"H\u00f6llein, L., Cao, A., Owens, A., Johnson, J., Nie\u00dfner, M.: Text2room: extracting textured 3D meshes from 2D text-to-image models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7909\u20137920 (2023)","DOI":"10.1109\/ICCV51070.2023.00727"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D Gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42(4) (2023). https:\/\/repo-sam.inria.fr\/fungraph\/3d-gaussian-splatting\/","DOI":"10.1145\/3592433"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Li, Q., Li, F., Guo, J., Guo, Y.: UHDNeRF: ultra-high-definition neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (CVPR), pp. 23097\u201323108 (2023)","DOI":"10.1109\/ICCV51070.2023.02111"},{"key":"21_CR26","unstructured":"Li, X., Wang, H., Tseng, K.K.: Gaussiandiffusion: 3D Gaussian splatting for denoising diffusion probabilistic models with structured noise. arXiv preprint arXiv:2311.11221 (2023)"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Lin, C.H., et al.: Magic3D: high-resolution text-to-3D content creation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 300\u2013309 (2023)","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"21_CR28","unstructured":"Lin, Y., et al.: Componerf: text-guided multi-object compositional nerf with editable 3D scene layout. arXiv preprint arXiv:2303.13843 (2023)"},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Lorraine, J., et al.: Att3d: amortized text-to-3D object synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 17946\u201317956 (2023)","DOI":"10.1109\/ICCV51070.2023.01645"},{"key":"21_CR30","doi-asserted-by":"crossref","unstructured":"Metzer, G., Richardson, E., Patashnik, O., Giryes, R., Cohen-Or, D.: Latent-nerf for shape-guided generation of 3D shapes and textures. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12663\u201312673 (2023)","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. In: Proceedings of the European Conference on Computer Vision (ECCV) (2020)","DOI":"10.1007\/978-3-030-58452-8_24"},{"issue":"4","key":"21_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. (ToG) 41(4), 1\u201315 (2022)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"21_CR33","unstructured":"Paschalidou, D., Kar, A., Shugrina, M., Kreis, K., Geiger, A., Fidler, S.: ATISS: autoregressive transformers for indoor scene synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 12013\u201312026 (2021)"},{"key":"21_CR34","doi-asserted-by":"crossref","unstructured":"Phung, Q., Ge, S., Huang, J.B.: Grounded text-to-image synthesis with attention refocusing. arXiv preprint arXiv:2306.05427 (2023)","DOI":"10.1109\/CVPR52733.2024.00758"},{"key":"21_CR35","unstructured":"Po, R., Wetzstein, G.: Compositional 3D scene generation using locally conditioned diffusion. arXiv abs\/2303.12218 (2023). https:\/\/api.semanticscholar.org\/CorpusID:257663283"},{"key":"21_CR36","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3D using 2D diffusion. In: The Eleventh International Conference on Learning Representations (ICLR) (2023). https:\/\/openreview.net\/forum?id=FjNys5c7VyY"},{"key":"21_CR37","unstructured":"Qian, G., et al.: Magic123: one image to high-quality 3d object generation using both 2D and 3D diffusion priors. In: The Twelfth International Conference on Learning Representations (ICLR) (2024). https:\/\/openreview.net\/forum?id=0jHkUDyEO9"},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Raj, A., et al.: Dreambooth3D: subject-driven text-to-3D generation. In: The International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.00223"},{"issue":"4","key":"21_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592426","volume":"42","author":"C Reiser","year":"2023","unstructured":"Reiser, C., et al.: MERF: memory-efficient radiance fields for real-time view synthesis in unbounded scenes. ACM Trans. Graph. (TOG) 42(4), 1\u201312 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"21_CR40","unstructured":"Ren, J., et al.: Make-a-character: high quality text-to-3D character generation within minutes. arXiv preprint arXiv:2312.15430 (2023)"},{"key":"21_CR41","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"21_CR42","unstructured":"Sanh, V., et al.: Multitask prompted training enables zero-shot task generalization. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=9Vrb9D0WI4"},{"key":"21_CR43","doi-asserted-by":"crossref","unstructured":"Sun, C., Sun, M., Chen, H.T.: Direct voxel grid optimization: super-fast convergence for radiance fields reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5459\u20135469 (2022)","DOI":"10.1109\/CVPR52688.2022.00538"},{"key":"21_CR44","unstructured":"Sun, C., Han, J., Deng, W., Wang, X., Qin, Z., Gould, S.: 3D-GPT: procedural 3D modeling with large language models. arXiv preprint arXiv:2310.12945 (2023)"},{"key":"21_CR45","doi-asserted-by":"crossref","unstructured":"Tancik, M., et al.: Block-nerf: scalable large scene neural view synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8248\u20138258 (2022)","DOI":"10.1109\/CVPR52688.2022.00807"},{"key":"21_CR46","doi-asserted-by":"crossref","unstructured":"Tancik, M., et al.: Nerfstudio: a modular framework for neural radiance field development. In: ACM SIGGRAPH 2023 Conference Proceedings. SIGGRAPH 2023 (2023)","DOI":"10.1145\/3588432.3591516"},{"key":"21_CR47","unstructured":"Tang, J., Ren, J., Zhou, H., Liu, Z., Zeng, G.: Dreamgaussian: generative gaussian splatting for efficient 3D content creation. arXiv preprint arXiv:2309.16653 (2023)"},{"key":"21_CR48","unstructured":"Vachha, C., Haque, A.: Instruct-gs2gs: editing 3D Gaussian splats with instructions (2024). https:\/\/instruct-gs2gs.github.io\/"},{"key":"21_CR49","doi-asserted-by":"crossref","unstructured":"Wang, H., Du, X., Li, J., Yeh, R.A., Shakhnarovich, G.: Score Jacobian chaining: lifting pretrained 2D diffusion models for 3D generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12619\u201312629 (2023)","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"21_CR50","unstructured":"Wang, Z., et al.: Prolificdreamer: high-fidelity and diverse text-to-3D generation with variational score distillation. In: Advances in Neural Information Processing Systems (NeurIPS) (2023)"},{"key":"21_CR51","unstructured":"Wei, J., et al.: Finetuned language models are zero-shot learners. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=gEZrGCozdqR"},{"key":"21_CR52","doi-asserted-by":"crossref","unstructured":"Xu, Y., et\u00a0al.: Discoscene: spatially disentangled generative radiance fields for controllable 3D-aware scene synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4402\u20134412 (2023)","DOI":"10.1109\/CVPR52729.2023.00428"},{"key":"21_CR53","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: An empirical study of GPT-3 for few-shot knowledge-based VQA. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), vol.\u00a036, pp. 3081\u20133089 (2022)","DOI":"10.1609\/aaai.v36i3.20215"},{"key":"21_CR54","unstructured":"Yi, T., et al.: Gaussiandreamer: fast generation from text to 3d gaussian splatting with point cloud priors. arXiv preprint arXiv:2310.08529 (2023)"},{"key":"21_CR55","doi-asserted-by":"crossref","unstructured":"Yu, A., Li, R., Tancik, M., Li, H., Ng, R., Kanazawa, A.: Plenoctrees for real-time rendering of neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (CVPR), pp. 5752\u20135761 (2021)","DOI":"10.1109\/ICCV48922.2021.00570"},{"key":"21_CR56","unstructured":"Zhou, X., et al.: Gala3D: towards text-to-3D complex scene generation via layout-guided generative Gaussian splatting. arXiv preprint arXiv:2402.07207 (2024)"},{"key":"21_CR57","unstructured":"Zhu, J., Zhuang, P., Koyejo, S.: HIFA: high-fidelity text-to-3D generation with advanced diffusion guidance. In: The Twelfth International Conference on Learning Representations (ICLR) (2024). https:\/\/openreview.net\/forum?id=IZMPWmcS3H"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73013-9_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T08:31:56Z","timestamp":1732609916000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73013-9_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,27]]},"ISBN":["9783031730122","9783031730139"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73013-9_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,27]]},"assertion":[{"value":"27 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}