{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T16:45:01Z","timestamp":1779295501645,"version":"3.51.4"},"publisher-location":"Cham","reference-count":53,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031727603","type":"print"},{"value":"9783031727610","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72761-0_11","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"184-201","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Structured-NeRF: Hierarchical Scene Graph with\u00a0Neural Representation"],"prefix":"10.1007","author":[{"given":"Zhide","family":"Zhong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiakai","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Songen","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sirui","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liyi","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guyue","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zike","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"11_CR1","unstructured":"Achiam, J., et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Tancik, M., Hedman, P., Martin-Brualla, R., Srinivasan, P.P.: Mip-nerf: a multiscale representation for anti-aliasing neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5855\u20135864 (2021)","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"11_CR3","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Verbin, D., Srinivasan, P.P., Hedman, P.: Mip-nerf 360: unbounded anti-aliased neural radiance fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5470\u20135479 (2022)","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"11_CR4","unstructured":"Bing, W., Chen, L., Yang, B.: Dm-nerf: 3d scene geometry decomposition and manipulation from 2d images. In: ICLR (2023)"},{"key":"11_CR5","doi-asserted-by":"crossref","unstructured":"Cao, C., Cai, Y., Dong, Q., Wang, Y., Fu, Y.: Leftrefill: filling right canvas based on left reference through generalized text-to-image diffusion model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2024)","DOI":"10.1109\/CVPR52733.2024.00736"},{"key":"11_CR6","unstructured":"Chang, H., et\u00a0al.: Context-aware entity grounding with open-vocabulary 3d scene graphs. arXiv preprint arXiv:2309.15940 (2023)"},{"key":"11_CR7","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1007\/978-3-031-19815-1_37","volume-title":"ECCV 2022","author":"HK Cheng","year":"2022","unstructured":"Cheng, H.K., Schwing, A.G.: Xmem: Long-term video object segmentation with an atkinson-shiffrin memory model. In: Avidan, S., Brostow, G., Cisse, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, pp. 640\u2013658. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_37"},{"key":"11_CR8","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: Scannet: richly-annotated 3d reconstructions of indoor scenes. In: CVPR, pp. 5828\u20135839 (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Eftekhar, A., Sax, A., Malik, J., Zamir, A.: Omnidata: a scalable pipeline for making multi-task mid-level vision datasets from 3d scans. In: ICCV, pp. 10786\u201310796 (2021)","DOI":"10.1109\/ICCV48922.2021.01061"},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Gu, Q., et al.: Conceptgraphs: open-vocabulary 3d scene graphs for perception and planning (2023)","DOI":"10.1109\/ICRA57147.2024.10610243"},{"key":"11_CR11","doi-asserted-by":"crossref","unstructured":"Han, X., Liu, H., Ding, Y., Yang, L.: Ro-map: real-time multi-object mapping with neural radiance fields (2023)","DOI":"10.1109\/LRA.2023.3302176"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"Haque, A., Tancik, M., Efros, A., Holynski, A., Kanazawa, A.: Instruct-nerf2nerf: editing 3d scenes with instructions. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"11_CR13","doi-asserted-by":"crossref","unstructured":"Heller, G., Fetaya, E.: Can stochastic gradient langevin dynamics provide differential privacy for deep learning? (2023)","DOI":"10.1109\/SaTML54575.2023.00015"},{"key":"11_CR14","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"11_CR15","doi-asserted-by":"crossref","unstructured":"Jambon, C., Kerbl, B., Kopanas, G., Diolatzis, S., Drettakis, G., Leimk\u00fchler, T.: Nerfshop: interactive editing of neural radiance fields, vol.\u00a06 (2023)","DOI":"10.1145\/3585499"},{"key":"11_CR16","doi-asserted-by":"publisher","first-page":"3956","DOI":"10.1109\/LRA.2023.3272516","volume":"8","author":"I Kapelyukh","year":"2023","unstructured":"Kapelyukh, I., Vosylius, V., Johns, E.: Dall-e-bot: introducing web-scale diffusion models to robotics. IEEE Rob. Autom. Lett. 8, 3956\u20133963 (2023)","journal-title":"IEEE Rob. Autom. Lett."},{"key":"11_CR17","doi-asserted-by":"crossref","unstructured":"Kerr, J., Kim, C.M., Goldberg, K., Kanazawa, A., Tancik, M.: Lerf: language embedded radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19729\u201319739 (2023)","DOI":"10.1109\/ICCV51070.2023.01807"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Kong, X., Liu, S., Taher, M., Davison, A.J.: vmap: vectorised object mapping for neural field slam. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 952\u2013961 (2023)","DOI":"10.1109\/CVPR52729.2023.00098"},{"key":"11_CR19","doi-asserted-by":"crossref","unstructured":"Kong, X., Liu, S., Taher, M., Davison, A.J.: vmap: vectorised object mapping for neural field slam. In: CVPR, pp. 952\u2013961 (2023)","DOI":"10.1109\/CVPR52729.2023.00098"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"Kundu, A., et al.: Panoptic neural fields: a semantic object-aware neural scene representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12871\u201312881 (2022)","DOI":"10.1109\/CVPR52688.2022.01253"},{"issue":"5","key":"11_CR21","doi-asserted-by":"publisher","first-page":"2780","DOI":"10.1109\/LRA.2023.3257707","volume":"8","author":"S Le Cleac\u2019h","year":"2023","unstructured":"Le Cleac\u2019h, S., et al.: Differentiable physics simulation of dynamics-augmented neural objects. IEEE Rob. Autom. Lett. 8(5), 2780\u20132787 (2023). https:\/\/doi.org\/10.1109\/LRA.2023.3257707","journal-title":"IEEE Rob. Autom. Lett."},{"key":"11_CR22","unstructured":"Liu, H.K., Shen, I., Chen, B.Y., et\u00a0al.: Nerf-in: free-form nerf inpainting with rgb-d priors. arXiv preprint arXiv:2206.04901 (2022)"},{"key":"11_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/978-3-030-58452-8_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"B Mildenhall","year":"2020","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 405\u2013421. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_24"},{"issue":"1","key":"11_CR24","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"11_CR25","doi-asserted-by":"crossref","unstructured":"Mirzaei, A., et al.: Reference-guided controllable inpainting of neural radiance fields. arXiv preprint arXiv:2304.09677 (2023)","DOI":"10.1109\/ICCV51070.2023.01633"},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"Mirzaei, A., et al.: Spin-nerf: multiview segmentation and perceptual inpainting with neural radiance fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20669\u201320679 (2023)","DOI":"10.1109\/CVPR52729.2023.01980"},{"key":"11_CR27","doi-asserted-by":"crossref","unstructured":"Nie, Y., Han, X., Guo, S., Zheng, Y., Chang, J., Zhang, J.J.: Total3dunderstanding: joint layout, object pose and mesh reconstruction for indoor scenes from a single image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 55\u201364 (2020)","DOI":"10.1109\/CVPR42600.2020.00013"},{"key":"11_CR28","unstructured":"OpenAI: Gpt-4 technical report (2023)"},{"key":"11_CR29","doi-asserted-by":"crossref","unstructured":"Ost, J., Mannan, F., Thuerey, N., Knodt, J., Heide, F.: Neural scene graphs for dynamic scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2856\u20132865 (2021)","DOI":"10.1109\/CVPR46437.2021.00288"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Park, J.J., Florence, P., Straub, J., Newcombe, R., Lovegrove, S.: Deepsdf: learning continuous signed distance functions for shape representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 165\u2013174 (2019)","DOI":"10.1109\/CVPR.2019.00025"},{"key":"11_CR31","first-page":"12013","volume":"34","author":"D Paschalidou","year":"2021","unstructured":"Paschalidou, D., Kar, A., Shugrina, M., Kreis, K., Geiger, A., Fidler, S.: Atiss: autoregressive transformers for indoor scene synthesis. Adv. Neural. Inf. Process. Syst. 34, 12013\u201312026 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"11_CR32","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents (2022)"},{"key":"11_CR33","unstructured":"Rana, K., Haviland, J., Garg, S., Abou-Chakra, J., Reid, I., Suenderhauf, N.: Sayplan: grounding large language models using 3d scene graphs for scalable task planning. arXiv preprint arXiv:2307.06135 (2023)"},{"key":"11_CR34","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"11_CR35","doi-asserted-by":"crossref","unstructured":"Schonberger, J.L., Frahm, J.M.: Structure-from-motion revisited. In: CVPR, pp. 4104\u20134113 (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"11_CR36","unstructured":"Shahbazi, M., et al.: Inserf: text-driven generative object insertion in neural 3d scenes. arXiv preprint arXiv:2401.05335 (2024)"},{"key":"11_CR37","doi-asserted-by":"crossref","unstructured":"Shum, K.C., Kim, J., Hua, B.S., Nguyen, D.T., Yeung, S.K.: Language-driven object fusion into neural radiance fields with pose-conditioned dataset updates (2023)","DOI":"10.1109\/CVPR52733.2024.00495"},{"key":"11_CR38","doi-asserted-by":"crossref","unstructured":"Suvorov, R., et al.: Resolution-robust large mask inpainting with fourier convolutions, pp. 2149\u20132159 (2022)","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"11_CR39","doi-asserted-by":"crossref","unstructured":"Tancik, M., et\u00a0al.: Nerfstudio: a modular framework for neural radiance field development, pp. 1\u201312 (2023)","DOI":"10.1145\/3588432.3591516"},{"key":"11_CR40","unstructured":"Touvron, H., et\u00a0al.: Llama: open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"issue":"4","key":"11_CR41","first-page":"1","volume":"37","author":"K Wang","year":"2018","unstructured":"Wang, K., Savva, M., Chang, A.X., Ritchie, D.: Deep convolutional priors for indoor scene synthesis. ACM Trans. Graph. (TOG) 37(4), 1\u201314 (2018)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"11_CR42","doi-asserted-by":"crossref","unstructured":"Wang, X., Yeshwanth, C., Nie\u00dfner, M.: Sceneformer: indoor scene generation with transformers. In: 2021 International Conference on 3D Vision (3DV), pp. 106\u2013115. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00021"},{"key":"11_CR43","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wu, W., Xu, D.: Learning unified decompositional and compositional nerf for editable novel view synthesis. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01673"},{"key":"11_CR44","doi-asserted-by":"crossref","unstructured":"Wei, Q.A., et al.: Lego-net: learning regular rearrangements of objects in rooms. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19037\u201319047 (2023)","DOI":"10.1109\/CVPR52729.2023.01825"},{"key":"11_CR45","doi-asserted-by":"crossref","unstructured":"Williams, L.: Casting curved shadows on curved surfaces. SIGGRAPH Comput. Graph. 12(3), 270-274 (1978)","DOI":"10.1145\/965139.807402"},{"key":"11_CR46","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/978-3-031-19812-0_12","volume-title":"ECCV","author":"Q Wu","year":"2022","unstructured":"Wu, Q., et al.: Object-compositional neural implicit surfaces. In: Avidan, S., Brostow, G., Cisse, M., Farinella, G.M., Hassner, T. (eds.) ECCV, pp. 197\u2013213. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-19812-0_12"},{"key":"11_CR47","doi-asserted-by":"crossref","unstructured":"Wu, Z., et al.: Mars: an instance-aware, modular and realistic simulator for autonomous driving. In: CICAI (2023)","DOI":"10.1007\/978-981-99-8850-1_1"},{"key":"11_CR48","doi-asserted-by":"crossref","unstructured":"Yang, B., et al.: Learning object-compositional neural radiance field for editable scene rendering. In: ICCV, pp. 13779\u201313788 (2021)","DOI":"10.1109\/ICCV48922.2021.01352"},{"key":"11_CR49","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: Unisim: a neural closed-loop sensor simulator. In: CVPR, pp. 1389\u20131399 (2023)","DOI":"10.1109\/CVPR52729.2023.00140"},{"key":"11_CR50","unstructured":"Yang, Z., et al.: The dawn of lmms: preliminary explorations with gpt-4v (ision), 9(1), 1 (2023). arXiv preprint arXiv:2309.17421"},{"key":"11_CR51","first-page":"25018","volume":"35","author":"Z Yu","year":"2022","unstructured":"Yu, Z., Peng, S., Niemeyer, M., Sattler, T., Geiger, A.: Monosdf: exploring monocular geometric cues for neural implicit surface reconstruction. NeurIPS 35, 25018\u201325032 (2022)","journal-title":"NeurIPS"},{"issue":"1","key":"11_CR52","doi-asserted-by":"publisher","first-page":"107","DOI":"10.26804\/ager.2020.01.10","volume":"4","author":"W Zha","year":"2020","unstructured":"Zha, W., Li, X., Xing, Y., He, L., Li, D.: Reconstruction of shale image based on wasserstein generative adversarial networks with gradient penalty. Adv. Geo-Energy Res. 4(1), 107\u2013114 (2020)","journal-title":"Adv. Geo-Energy Res."},{"key":"11_CR53","doi-asserted-by":"crossref","unstructured":"Zhi, S., Laidlow, T., Leutenegger, S., Davison, A.J.: In-place scene labelling and understanding with implicit scene representation (2021)","DOI":"10.1109\/ICCV48922.2021.01554"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72761-0_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:29:43Z","timestamp":1727594983000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72761-0_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031727603","9783031727610"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72761-0_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}