{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T09:16:29Z","timestamp":1774602989869,"version":"3.50.1"},"reference-count":208,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:00:00Z","timestamp":1770336000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:00:00Z","timestamp":1770336000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s11263-025-02663-5","type":"journal-article","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:39:30Z","timestamp":1770352770000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Semantically-aware Neural Radiance Fields for Visual Scene Understanding: A Comprehensive Review"],"prefix":"10.1007","volume":"134","author":[{"given":"Thang-Anh-Quan","family":"Nguyen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4689-8536","authenticated-orcid":false,"given":"Amine","family":"Bourki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M\u00e1ty\u00e1s","family":"Macudzinski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anthony","family":"Brunel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6603-3257","authenticated-orcid":false,"given":"Mohammed","family":"Bennamoun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,6]]},"reference":[{"key":"2663_CR1","doi-asserted-by":"crossref","unstructured":"Aan\u00e6s, H., Jensen, R.\u00a0R., Vogiatzis, G., Tola, E., & Dahl, A.\u00a0B. (2016). Large-scale data for multiple-view stereopsis. International Journal of Computer Vision, pp. 1\u201316.","DOI":"10.1007\/s11263-016-0902-9"},{"key":"2663_CR2","doi-asserted-by":"publisher","first-page":"15702","DOI":"10.52202\/068431-1142","volume":"35","author":"A Atanov","year":"2022","unstructured":"Atanov, A., Filatov, A., Yeo, T., Sohmshetty, A., & Zamir, A. (2022). Task discovery: Finding the tasks that neural networks generalize on. Advances in Neural Information Processing Systems, 35, 15702\u201315717.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2663_CR3","doi-asserted-by":"crossref","unstructured":"Bao, C., Zhang, Y., Yang, B., Fan, T., Yang, Z., Bao, H., Zhang, G., & Cui, Z. (2023). Sine: Semantic-driven image-based nerf editing with prior-guided editing field. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20919\u201320929.","DOI":"10.1109\/CVPR52729.2023.02004"},{"key":"2663_CR4","doi-asserted-by":"crossref","unstructured":"Barron, J.\u00a0T., Mildenhall, B., Verbin, D., Srinivasan, P.\u00a0P., & Hedman, P. (2022). Mip-nerf 360: Unbounded anti-aliased neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5470\u20135479.","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"2663_CR5","doi-asserted-by":"crossref","unstructured":"Bhalgat, Y., Laina, I., Henriques, J.\u00a0F., Zisserman, A., & Vedaldi, A. (2023). Contrastive lift: 3d object instance segmentation by slow-fast contrastive fusion. arXiv preprint arXiv:2306.04633.","DOI":"10.52202\/075280-0399"},{"key":"2663_CR6","unstructured":"Bing, W., Chen, L., & Yang, B. (2022). Dm-nerf: 3d scene geometry decomposition and manipulation from 2d images."},{"key":"2663_CR7","doi-asserted-by":"crossref","unstructured":"Blomqvist, K., Ott, L., Chung, J.\u00a0J., & Siegwart, R. (2023). Baking in the feature: Accelerating volumetric segmentation by rendering feature maps. In 2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 7629\u20137634. IEEE.","DOI":"10.1109\/IROS55552.2023.10342071"},{"key":"2663_CR8","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., & Efros, A.\u00a0A. (2023). Instructpix2pix: Learning to follow image editing instructions. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402.","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"2663_CR9","doi-asserted-by":"crossref","unstructured":"Caesar, H., Bankiti, V., Lang, A.\u00a0H., Vora, S., Liong, V.\u00a0E., Xu, Q., Krishnan, A., Pan, Y., Baldan, G., & Beijbom, O. (2020). nuscenes: A multimodal dataset for autonomous driving. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 11621\u201311631.","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"2663_CR10","doi-asserted-by":"crossref","unstructured":"Cao, A.-Q., & de\u00a0Charette, R. (2023). Scenerf: Self-supervised monocular 3d scene reconstruction with radiance fields. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9387\u20139398.","DOI":"10.1109\/ICCV51070.2023.00861"},{"key":"2663_CR11","doi-asserted-by":"crossref","unstructured":"Cao, C., & Fu, Y. (2021). Learning a sketch tensor space for image inpainting of man-made scenes. 2021 ieee. In CVF International Conference on Computer Vision, ICCV, pp. 10\u201317.","DOI":"10.1109\/ICCV48922.2021.01424"},{"key":"2663_CR12","doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I., J\u00e9gou, H., Mairal, J., Bojanowski, P., & Joulin, A. (2021). Emerging properties in self-supervised vision transformers. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 9650\u20139660.","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"2663_CR13","doi-asserted-by":"crossref","unstructured":"Cen, J., Zhou, Z., Fang, J., Shen, W., Xie, L., Zhang, X., & Tian, Q. (2023). Segment anything in 3d with nerfs. arXiv preprint arXiv:2304.12308.","DOI":"10.52202\/075280-1130"},{"key":"2663_CR14","doi-asserted-by":"crossref","unstructured":"Chan, E.\u00a0R., Monteiro, M., Kellnhofer, P., Wu, J., & Wetzstein, G. (2021). pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 5799\u20135809.","DOI":"10.1109\/CVPR46437.2021.00574"},{"key":"2663_CR15","doi-asserted-by":"crossref","unstructured":"Chan, E.\u00a0R., Lin, C.\u00a0Z., Chan, M.\u00a0A., Nagano, K., Pan, B., De\u00a0Mello, S., Gallo, O., Guibas, L.\u00a0J., Tremblay, J., Khamis, S. et\u00a0al. (2022). Efficient geometry-aware 3d generative adversarial networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16123\u201316133.","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"2663_CR16","doi-asserted-by":"crossref","unstructured":"Chang, A., Dai, A., Funkhouser, T., Halber, M., Niessner, M., Savva, M., Song, S., Zeng, A., & Zhang, Y. (2017). Matterport3d: Learning from rgb-d data in indoor environments. International Conference on 3D Vision (3DV).","DOI":"10.1109\/3DV.2017.00081"},{"key":"2663_CR17","unstructured":"Chang, A.\u00a0X., Funkhouser, T., Guibas, L., Hanrahan, P., Huang, Q., Li, Z., Savarese, S., Savva, M., Song, S., Su, H., et\u00a0al. (2015). Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:1512.03012."},{"key":"2663_CR18","doi-asserted-by":"crossref","unstructured":"Chen, A., Xu, Z., Zhao, F., Zhang, X., Xiang, F., Yu, J., & Su, H. (2021). Mvsnerf: Fast generalizable radiance field reconstruction from multi-view stereo. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14124\u201314133.","DOI":"10.1109\/ICCV48922.2021.01386"},{"key":"2663_CR19","doi-asserted-by":"crossref","unstructured":"Chen, J., Yi, W., Ma, L., Jia, X., & Lu, H. (2023a). Gm-nerf: Learning generalizable model-based neural radiance fields from multi-view images. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20648\u201320658.","DOI":"10.1109\/CVPR52729.2023.01978"},{"key":"2663_CR20","doi-asserted-by":"crossref","unstructured":"Chen, L., Song, Y., Bao, H., & Zhou, X. (2023b). Perceiving unseen 3d objects by poking the objects. In ICRA.","DOI":"10.1109\/ICRA48891.2023.10160338"},{"key":"2663_CR21","doi-asserted-by":"crossref","unstructured":"Chen, S., Zhang, K., Shi, Y., Wang, H., Zhu, Y., Song, G., An, S., Kristjansson, J., Yang, X., & Zwicker, M. (2023c). Panic-3d: Stylized single-view 3d reconstruction from portraits of anime characters. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21068\u201321077.","DOI":"10.1109\/CVPR52729.2023.02018"},{"key":"2663_CR22","doi-asserted-by":"crossref","unstructured":"Chen, X., Huang, J., Bin, Y., Yu, L., & Liao, Y. (2023d). Veri3d: Generative vertex-based radiance fields for 3d controllable human image synthesis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8986\u20138997.","DOI":"10.1109\/ICCV51070.2023.00825"},{"key":"2663_CR23","doi-asserted-by":"crossref","unstructured":"Cheng, X., Wu, Y., Jia, M., Wang, Q., & Zhang, J. (2023). Panoptic compositional feature field for editable scene rendering with network-inferred labels via metric learning. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4947\u20134957.","DOI":"10.1109\/CVPR52729.2023.00479"},{"key":"2663_CR24","doi-asserted-by":"crossref","unstructured":"Chibane, J., Bansal, A., Lazova, V., & Pons-Moll, G. (2021). Stereo radiance fields (srf): Learning view synthesis for sparse views of novel scenes. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7911\u20137920.","DOI":"10.1109\/CVPR46437.2021.00782"},{"key":"2663_CR25","doi-asserted-by":"crossref","unstructured":"Cohen-Bar, D., Richardson, E., Metzer, G., Giryes, R., & Cohen-Or, D. (2023). Set-the-scene: Global-local training for generating controllable nerf scenes. arXiv preprint arXiv:2303.13450.","DOI":"10.1109\/ICCVW60793.2023.00314"},{"key":"2663_CR26","unstructured":"Coughlan, J., & Yuille, A.\u00a0L. (2000). The manhattan world assumption: Regularities in scene statistics which enable bayesian inference. Advances in Neural Information Processing Systems, 13."},{"key":"2663_CR27","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.\u00a0X., Savva, M., Halber, M., Funkhouser, T., & Nie\u00dfner, M. (2017). Scannet: Richly-annotated 3d reconstructions of indoor scenes. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5828\u20135839.","DOI":"10.1109\/CVPR.2017.261"},{"key":"2663_CR28","doi-asserted-by":"crossref","unstructured":"Deng, C., Jiang, C., Qi, C.\u00a0R., Yan, X., Zhou, Y., Guibas, L., Anguelov, D. et\u00a0al. (2023). Nerdi: Single-view nerf synthesis with language-guided diffusion as general image priors. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20637\u201320647.","DOI":"10.1109\/CVPR52729.2023.01977"},{"key":"2663_CR29","doi-asserted-by":"crossref","unstructured":"Deng, Y., Yang, J., Xiang, J., & Tong, X. (2022). Gram: Generative radiance manifolds for 3d-aware image generation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10673\u201310683.","DOI":"10.1109\/CVPR52688.2022.01041"},{"key":"2663_CR30","unstructured":"Fan, Z., Wang, P., Jiang, Y., Gong, X., Xu, D., & Wang, Z. (2022). Nerf-sos: Any-view self-supervised object segmentation on complex scenes. arXiv preprint arXiv:2209.08776."},{"key":"2663_CR31","first-page":"27503","volume":"34","author":"C Fifty","year":"2021","unstructured":"Fifty, C., Amid, E., Zhao, Z., Yu, T., Anil, R., & Finn, C. (2021). Efficiently identifying task groupings for multi-task learning. Advances in Neural Information Processing Systems, 34, 27503\u201327516.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2663_CR32","doi-asserted-by":"crossref","unstructured":"Fridovich-Keil, S., Meanti, G., Warburg, F.\u00a0R., Recht, B., & Kanazawa, A. (2023). K-planes: Explicit radiance fields in space, time, and appearance. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12479\u201312488.","DOI":"10.1109\/CVPR52729.2023.01201"},{"key":"2663_CR33","doi-asserted-by":"crossref","unstructured":"Fu, H., Cai, B., Gao, L., Zhang, L.-X., Wang, J., Li, C., Zeng, Q., Sun, C., Jia, R., Zhao, B. et\u00a0al. (2021). 3d-front: 3d furnished rooms with layouts and semantics. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10933\u201310942.","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"2663_CR34","doi-asserted-by":"crossref","unstructured":"Fu, X., Zhang, S., Chen, T., Lu, Y., Zhu, L., Zhou, X., Geiger, A., & Liao, Y. (2022). Panoptic nerf: 3d-to-2d label transfer for panoptic urban scene segmentation. In International Conference on 3D Vision (3DV).","DOI":"10.1109\/3DV57658.2022.00042"},{"key":"2663_CR35","unstructured":"Fu, X., Zhang, S., Chen, T., Lu, Y., Zhou, X., Geiger, A., & Liao, Y. (2023). Panopticnerf-360: Panoramic 3d-to-2d label transfer in urban scenes. arXiv preprint arXiv:2309.10815."},{"key":"2663_CR36","doi-asserted-by":"crossref","unstructured":"Gafni, G., Thies, J., Zollhofer, M., & Nie\u00dfner, M. (2021). Dynamic neural radiance fields for monocular 4d facial avatar reconstruction. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8649\u20138658.","DOI":"10.1109\/CVPR46437.2021.00854"},{"key":"2663_CR37","doi-asserted-by":"crossref","unstructured":"Gaidon, A., Wang, Q., Cabon, Y., & Vig, E. (2016). Virtual worlds as proxy for multi-object tracking analysis. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4340\u20134349.","DOI":"10.1109\/CVPR.2016.470"},{"key":"2663_CR38","doi-asserted-by":"crossref","unstructured":"Gao, C., Saraf, A., Kopf, J., & Huang, J.-B. (2021). Dynamic view synthesis from dynamic monocular video. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5712\u20135721.","DOI":"10.1109\/ICCV48922.2021.00566"},{"key":"2663_CR39","unstructured":"Gao, K., Gao, Y., He, H., Lu, D., Xu, L., & Li, J. (2022). Nerf: Neural radiance field in 3d vision, a comprehensive review. arXiv preprint arXiv:2210.00379."},{"key":"2663_CR40","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., & Urtasun, R. (2012). Are we ready for autonomous driving? the kitti vision benchmark suite. In 2012 IEEE conference on computer vision and pattern recognition, pp. 3354\u20133361. IEEE.","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"11","key":"2663_CR41","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger, A., Lenz, P., Stiller, C., & Urtasun, R. (2013). Vision meets robotics: The kitti dataset. The International Journal of Robotics Research, 32(11), 1231\u20131237.","journal-title":"The International Journal of Robotics Research"},{"key":"2663_CR42","doi-asserted-by":"crossref","unstructured":"Goel, R., Sirikonda, D., Saini, S., & Narayanan, P. (2023). Interactive segmentation of radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4201\u20134211.","DOI":"10.1109\/CVPR52729.2023.00409"},{"key":"2663_CR43","unstructured":"Gu, J., Liu, L., Wang, P., & Theobalt, C. (2021). Stylenerf: A style-based 3d-aware generator for high-resolution image synthesis. arXiv preprint arXiv:2110.08985."},{"key":"2663_CR44","doi-asserted-by":"crossref","unstructured":"Guo, H., Peng, S., Lin, H., Wang, Q., Zhang, G., Bao, H., & Zhou, X. (2022). Neural 3d scene reconstruction with the manhattan-world assumption. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5511\u20135520.","DOI":"10.1109\/CVPR52688.2022.00543"},{"key":"2663_CR45","doi-asserted-by":"crossref","unstructured":"Haque, A., Tancik, M., Efros, A.\u00a0A., Holynski, A., & Kanazawa, A. (2023). Instruct-nerf2nerf: Editing 3d scenes with instructions. In Proceedings of the IEEE\/CVF International Conference on Computer Vision.","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"2663_CR46","unstructured":"Haughton, I., Sucar, E., Mouton, A., Johns, E., & Davison, A. (2023). Real-time mapping of physical scene properties with an autonomous robot experimenter. In Conference on Robot Learning, pp. 118\u2013127. PMLR."},{"key":"2663_CR47","doi-asserted-by":"crossref","unstructured":"Hayler, A., Wimbauer, F., Muhle, D., Rupprecht, C., & Cremers, D. (2023). S4c: Self-supervised semantic scene completion with neural fields. arXiv preprint arXiv:2310.07522.","DOI":"10.1109\/3DV62453.2024.00133"},{"key":"2663_CR48","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask r-cnn. In Proceedings of the IEEE international conference on computer vision, pp. 2961\u20132969.","DOI":"10.1109\/ICCV.2017.322"},{"key":"2663_CR49","doi-asserted-by":"crossref","unstructured":"Hong, Y., Lin, C., Du, Y., Chen, Z., Tenenbaum, J.\u00a0B., & Gan, C. (2023). 3d concept learning and reasoning from multi-view images. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9202\u20139212.","DOI":"10.1109\/CVPR52729.2023.00888"},{"key":"2663_CR50","doi-asserted-by":"crossref","unstructured":"Hu, B., Huang, J., Liu, Y., Tai, Y.-W., & Tang, C.-K. (2023a). Nerf-rpn: A general framework for object detection in nerfs. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23528\u201323538.","DOI":"10.1109\/CVPR52729.2023.02253"},{"key":"2663_CR51","doi-asserted-by":"crossref","unstructured":"Hu, S., Hong, F., Pan, L., Mei, H., Yang, L., & Liu, Z. (2023b). Sherf: Generalizable human nerf from a single image. In Proceedings of the IEEE\/CVF International Conference on Computer Vision.","DOI":"10.1109\/ICCV51070.2023.00858"},{"key":"2663_CR52","doi-asserted-by":"crossref","unstructured":"Huang, L., Hodan, T., Ma, L., Zhang, L., Tran, L., Twigg, C., Wu, P.-C., Yuan, J., Keskin, C., & Wang, R. (2022). Neural correspondence field for object pose estimation. In European Conference on Computer Vision, pp. 585\u2013603. Springer.","DOI":"10.1007\/978-3-031-20080-9_34"},{"key":"2663_CR53","doi-asserted-by":"publisher","first-page":"953","DOI":"10.1609\/aaai.v37i1.25175","volume":"37","author":"X Huang","year":"2023","unstructured":"Huang, X., Zhang, Y., Ni, B., Li, T., Chen, K., & Zhang, W. (2023). Boosting point clouds rendering via radiance mapping. In Proceedings of the AAAI conference on artificial intelligence, 37, 953\u2013961.","journal-title":"In Proceedings of the AAAI conference on artificial intelligence"},{"key":"2663_CR54","doi-asserted-by":"crossref","unstructured":"Hwang, S., Hyung, J., Kim, D., Kim, M.-J., & Choo, J. (2023). Faceclipnerf: Text-driven 3d face manipulation using deformable neural radiance fields. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3469\u20133479.","DOI":"10.1109\/ICCV51070.2023.00321"},{"key":"2663_CR55","doi-asserted-by":"crossref","unstructured":"Irshad, M.\u00a0Z., Zakharov, S., Ambrus, R., Kollar, T., Kira, Z., & Gaidon, A. (2022). Shapo: Implicit representations for multi-object shape, appearance, and pose optimization. In European Conference on Computer Vision, pp. 275\u2013292. Springer.","DOI":"10.1007\/978-3-031-20086-1_16"},{"key":"2663_CR56","doi-asserted-by":"crossref","unstructured":"Irshad, M.\u00a0Z., Zakharov, S., Liu, K., Guizilini, V., Kollar, T., Gaidon, A., Kira, Z., Ambrus, R. (2023). Neo 360: Neural fields for sparse view synthesis of outdoor scenes. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9187\u20139198.","DOI":"10.1109\/ICCV51070.2023.00843"},{"key":"2663_CR57","unstructured":"Jaegle, A., Gimeno, F., Brock, A., Vinyals, O., Zisserman, A., & Carreira, J. (2021). Perceiver: General perception with iterative attention. In International conference on machine learning, pp. 4651\u20134664. PMLR."},{"key":"2663_CR58","doi-asserted-by":"crossref","unstructured":"Jain, A., Tancik, M., & Abbeel, P. (2021). Putting nerf on a diet: Semantically consistent few-shot view synthesis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5885\u20135894.","DOI":"10.1109\/ICCV48922.2021.00583"},{"key":"2663_CR59","doi-asserted-by":"crossref","unstructured":"Jain, A., Mildenhall, B., Barron, J.\u00a0T., Abbeel, P., & Poole, B. (2022). Zero-shot text-guided object generation with dream fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 867\u2013876.","DOI":"10.1109\/CVPR52688.2022.00094"},{"key":"2663_CR60","doi-asserted-by":"crossref","unstructured":"Jang, W., & Agapito, L. (2021). Codenerf: Disentangled neural radiance fields for object categories. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12949\u201312958.","DOI":"10.1109\/ICCV48922.2021.01271"},{"key":"2663_CR61","doi-asserted-by":"crossref","unstructured":"Jayasundara, V., Agrawal, A., Heron, N., Shrivastava, A., & Davis, L.\u00a0S. (2023). Flexnerf: Photorealistic free-viewpoint rendering of moving humans from sparse views. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR52729.2023.02023"},{"key":"2663_CR62","doi-asserted-by":"crossref","unstructured":"Jensen, R., Dahl, A., Vogiatzis, G., Tola, E., & Aan\u00e6s, H. (2014). Large scale multi-view stereopsis evaluation. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 406\u2013413.","DOI":"10.1109\/CVPR.2014.59"},{"key":"2663_CR63","doi-asserted-by":"crossref","unstructured":"Jo, K., Shim, G., Jung, S., Yang, S., & Choo, J. (2023). Cg-nerf: Conditional generative neural radiance fields for 3d-aware image synthesis. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 724\u2013733.","DOI":"10.1109\/WACV56688.2023.00079"},{"key":"2663_CR64","doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., Van Der\u00a0Maaten, L., Fei-Fei, L., Lawrence\u00a0Zitnick, C., & Girshick, R. (2017). Clevr: A diagnostic dataset for compositional language and elementary visual reasoning. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2901\u20132910.","DOI":"10.1109\/CVPR.2017.215"},{"issue":"3","key":"2663_CR65","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1145\/964965.808594","volume":"18","author":"JT Kajiya","year":"1984","unstructured":"Kajiya, J. T., & Von Herzen, B. P. (1984). Ray tracing volume densities. ACM SIGGRAPH computer graphics, 18(3), 165\u2013174.","journal-title":"ACM SIGGRAPH computer graphics"},{"key":"2663_CR66","doi-asserted-by":"crossref","unstructured":"Kania, K., Yi, K.\u00a0M., Kowalski, M., Trzci\u0144ski, T., & Tagliasacchi, A. (2022). Conerf: Controllable neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18623\u201318632.","DOI":"10.1109\/CVPR52688.2022.01807"},{"key":"2663_CR67","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., & Aila, T. (2020). Analyzing and improving the image quality of stylegan. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 8110\u20138119.","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"2663_CR68","doi-asserted-by":"crossref","unstructured":"Kerr, J., Kim, C.\u00a0M., Goldberg, K., Kanazawa, A., & Tancik, M. (2023). Lerf: Language embedded radiance fields. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19729\u201319739.","DOI":"10.1109\/ICCV51070.2023.01807"},{"key":"2663_CR69","unstructured":"Kim, M., Ko, J., Cho, K., Choi, J., Choi, D., & Kim, S. (2022). Ae-nerf: Auto-encoding neural radiance fields for 3d-aware object manipulation. arXiv preprint arXiv:2204.13426."},{"key":"2663_CR70","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., Ravi, N., Mao, H., Rolland, C., Gustafson, L., Xiao, T., Whitehead, S., Berg, A.\u00a0C., Lo, W.-Y., et\u00a0al. (2023). Segment anything. arXiv preprint arXiv:2304.02643.","DOI":"10.1109\/ICCV51070.2023.00371"},{"issue":"4","key":"2663_CR71","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073599","volume":"36","author":"A Knapitsch","year":"2017","unstructured":"Knapitsch, A., Park, J., Zhou, Q.-Y., & Koltun, V. (2017). Tanks and temples: Benchmarking large-scale scene reconstruction. ACM Transactions on Graphics (ToG), 36(4), 1\u201313.","journal-title":"ACM Transactions on Graphics (ToG)"},{"key":"2663_CR72","doi-asserted-by":"crossref","unstructured":"Kobayashi, S., Matsumoto, E., & Sitzmann, V. (2022). Decomposing nerf for editing via feature field distillation. In Advances in Neural Information Processing Systems.","DOI":"10.52202\/068431-1694"},{"key":"2663_CR73","doi-asserted-by":"crossref","unstructured":"Kong, X., Liu, S., Taher, M., & Davison, A.\u00a0J. (2023). vmap: Vectorised object mapping for neural field slam. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 952\u2013961.","DOI":"10.1109\/CVPR52729.2023.00098"},{"key":"2663_CR74","unstructured":"Krishnan, A., Raj, A., Zhang, X., Carlson, A., Tseng, N., Sridhar, S., Jaipuria, N., & Hays, J. (2023). Lane: Lighting-aware neural fields for compositional scene synthesis. arXiv preprint arXiv:2304.03280."},{"key":"2663_CR75","doi-asserted-by":"crossref","unstructured":"Kundu, A., Genova, K., Yin, X., Fathi, A., Pantofaru, C., Guibas, L.\u00a0J., Tagliasacchi, A., Dellaert, F., & Funkhouser, T. (2022). Panoptic neural fields: A semantic object-aware neural scene representation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12871\u201312881.","DOI":"10.1109\/CVPR52688.2022.01253"},{"key":"2663_CR76","doi-asserted-by":"crossref","unstructured":"Lazova, V., Guzov, V., Olszewski, K., Tulyakov, S., & Pons-Moll, G. (2023). Control-nerf: Editable feature volumes for scene rendering and manipulation. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 4340\u20134350.","DOI":"10.1109\/WACV56688.2023.00432"},{"key":"2663_CR77","unstructured":"Lee, H.-H., & Chang, A.\u00a0X. (2022). Understanding pure clip guidance for voxel grid nerf models. arXiv preprint arXiv:2209.15172."},{"key":"2663_CR78","unstructured":"Li, B., Weinberger, K. Q., Belongie, S., Koltun, V., & Ranftl, R. (2022). Language-driven semantic segmentation."},{"key":"2663_CR79","unstructured":"Li, C., Zhang, C., Waghwase, A., Lee, L.-H., Rameau, F., Yang, Y., Bae, S.-H., & Hong, C.\u00a0S. (2023). Generative ai meets 3d: A survey on text-to-3d in aigc era. arXiv preprint arXiv:2305.06131."},{"key":"2663_CR80","doi-asserted-by":"crossref","unstructured":"Li, J., Feng, Z., She, Q., Ding, H., Wang, C., & Lee, G.\u00a0H. (2021a). Mine: Towards continuous depth mpi with nerf for novel view synthesis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12578\u201312588.","DOI":"10.1109\/ICCV48922.2021.01235"},{"key":"2663_CR81","doi-asserted-by":"crossref","unstructured":"Li, L.\u00a0H., Zhang, P., Zhang, H., Yang, J., Li, C., Zhong, Y., Wang, L., Yuan, L., Zhang, L., Hwang, J.-N., et\u00a0al. (2022b). Grounded language-image pre-training. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10965\u201310975.","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"2663_CR82","doi-asserted-by":"crossref","unstructured":"Li, T., Slavcheva, M., Zollhoefer, M., Green, S., Lassner, C., Kim, C., Schmidt, T., Lovegrove, S., Goesele, M., Newcombe, R. et\u00a0al. (2022c). Neural 3d video synthesis from multi-view video. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5521\u20135531.","DOI":"10.1109\/CVPR52688.2022.00544"},{"key":"2663_CR83","doi-asserted-by":"crossref","unstructured":"Li, Z., Niklaus, S., Snavely, N., & Wang, O. (2021b). Neural scene flow fields for space-time view synthesis of dynamic scenes. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6498\u20136508.","DOI":"10.1109\/CVPR46437.2021.00643"},{"key":"2663_CR84","doi-asserted-by":"crossref","unstructured":"Liang, Y., Laidlaw, E.E., Meyerowitz, A., Sridhar, S., & Tompkin, J. (2023). Semantic attention flow fields for monocular dynamic scene decomposition. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 21797\u201321806.","DOI":"10.1109\/ICCV51070.2023.01992"},{"issue":"3","key":"2663_CR85","doi-asserted-by":"publisher","first-page":"3292","DOI":"10.1109\/TPAMI.2022.3179507","volume":"45","author":"Y Liao","year":"2022","unstructured":"Liao, Y., Xie, J., & Geiger, A. (2022). Kitti-360: A novel dataset and benchmarks for urban scene understanding in 2d and 3d. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(3), 3292\u20133310.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2663_CR86","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., & Belongie, S. (2017). Feature pyramid networks for object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2117\u20132125.","DOI":"10.1109\/CVPR.2017.106"},{"key":"2663_CR87","unstructured":"Lin, Y., Bai, H., Li, S., Lu, H., Lin, X., Xiong, H., & Wang, L. (2023). Componerf: Text-guided multi-object compositional nerf with editable 3d scene layout. arXiv preprint arXiv:2303.13843."},{"key":"2663_CR88","doi-asserted-by":"crossref","unstructured":"Liu, F., Zhang, C., Zheng, Y., & Duan, Y. (2023a). Semantic ray: Learning a generalizable semantic field with cross-reprojection attention. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17386\u201317396.","DOI":"10.1109\/CVPR52729.2023.01668"},{"key":"2663_CR89","unstructured":"Liu, H.-K., Shen, I., Chen, B.-Y. et\u00a0al. (2022a). Nerf-in: Free-form nerf inpainting with rgb-d priors. arXiv preprint arXiv:2206.04901."},{"key":"2663_CR90","unstructured":"Liu, K., Zhan, F., Zhang, J., Xu, M., Yu, Y., El\u00a0Saddik, A., Theobalt, C., Xing, E., & Lu, S. (2023b). Weakly supervised 3d open-vocabulary segmentation. In Thirty-seventh Conference on Neural Information Processing Systems."},{"issue":"6","key":"2663_CR91","first-page":"1","volume":"40","author":"L Liu","year":"2021","unstructured":"Liu, L., Habermann, M., Rudnev, V., Sarkar, K., Gu, J., & Theobalt, C. (2021). Neural actor: Neural free-view synthesis of human actors with pose control. ACM transactions on graphics (TOG), 40(6), 1\u201316.","journal-title":"ACM transactions on graphics (TOG)"},{"key":"2663_CR92","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., & Vondrick, C. (2023c). Zero-1-to-3: Zero-shot one image to 3d object. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309.","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"2663_CR93","doi-asserted-by":"crossref","unstructured":"Liu, S., Zhang, X., Zhang, Z., Zhang, R., Zhu, J.-Y., & Russell, B. (2021b). Editing conditional radiance fields. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 5773\u20135783.","DOI":"10.1109\/ICCV48922.2021.00572"},{"key":"2663_CR94","doi-asserted-by":"crossref","unstructured":"Liu, T., Zhao, H., Yu, Y., Zhou, G., & Liu, M. (2023d). Car-studio: Learning car radiance fields from single-view and endless in-the-wild images. arXiv preprint arXiv:2307.14009.","DOI":"10.1109\/LRA.2024.3349949"},{"key":"2663_CR95","first-page":"17730","volume":"35","author":"X Liu","year":"2022","unstructured":"Liu, X., Chen, J., Yu, H., Tai, Y.-W., & Tang, C.-K. (2022). Unsupervised multi-view object segmentation using radiance field propagation. Advances in Neural Information Processing Systems, 35, 17730\u201317743.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2663_CR96","doi-asserted-by":"crossref","unstructured":"Liu, Y., Hu, B., Huang, J., Tai, Y.-W., & Tang, C.-K. (2023e). Instance neural radiance field. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 787\u2013796.","DOI":"10.1109\/ICCV51070.2023.00079"},{"key":"2663_CR97","doi-asserted-by":"crossref","unstructured":"Liu, Z., Milano, F., Frey, J., Siegwart, R., Blum, H., & Cadena, C. (2023f). Unsupervised continual semantic adaptation through neural rendering. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3031\u20133040.","DOI":"10.1109\/CVPR52729.2023.00296"},{"key":"2663_CR98","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., & Black, M.\u00a0J. (2023). Smpl: A skinned multi-person linear model. Seminal Graphics Papers: Pushing the Boundaries, Volume 2.","DOI":"10.1145\/3596711.3596800"},{"key":"2663_CR99","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.patrec.2016.01.019","volume":"72","author":"J Malik","year":"2016","unstructured":"Malik, J., Arbel\u00e1ez, P., Carreira, J., Fragkiadaki, K., Girshick, R., Gkioxari, G., Gupta, S., Hariharan, B., Kar, A., & Tulsiani, S. (2016). The three r\u2019s of computer vision: Recognition, reconstruction and reorganization. Pattern Recognition Letters, 72, 4\u201314.","journal-title":"Pattern Recognition Letters"},{"key":"2663_CR100","doi-asserted-by":"crossref","unstructured":"Martin-Brualla, R., Radwan, N., Sajjadi, M.\u00a0S., Barron, J.\u00a0T., Dosovitskiy, A., & Duckworth, D. (2021). Nerf in the wild: Neural radiance fields for unconstrained photo collections. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7210\u20137219.","DOI":"10.1109\/CVPR46437.2021.00713"},{"key":"2663_CR101","doi-asserted-by":"crossref","unstructured":"Mazur, K., Sucar, E., & Davison, A.\u00a0J. (2023). Feature-realistic neural fusion for real-time, open set scene understanding. In 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 8201\u20138207. IEEE.","DOI":"10.1109\/ICRA48891.2023.10160800"},{"key":"2663_CR102","doi-asserted-by":"crossref","unstructured":"Melas-Kyriazi, L., Laina, I., Rupprecht, C., & Vedaldi, A. (2023). Realfusion: 360deg reconstruction of any object from a single image. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8446\u20138455.","DOI":"10.1109\/CVPR52729.2023.00816"},{"key":"2663_CR103","doi-asserted-by":"crossref","unstructured":"Metzer, G., Richardson, E., Patashnik, O., Giryes, R., & Cohen-Or, D. (2023). Latent-nerf for shape-guided generation of 3d shapes and textures. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12663\u201312673.","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"2663_CR104","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.\u00a0P., Ortiz-Cayon, R., Kalantari, N.\u00a0K., Ramamoorthi, R., Ng, R., & Kar, A. (2019). Local light field fusion: Practical view synthesis with prescriptive sampling guidelines. ACM Transactions on Graphics (TOG).","DOI":"10.1145\/3306346.3322980"},{"key":"2663_CR105","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P.\u00a0P., Tancik, M., Barron, J.\u00a0T., Ramamoorthi, R., & Ng, R. (2020). Nerf: Representing scenes as neural radiance fields for view synthesis. In European Conference on Computer Vision, pp. 405\u2013421. Springer.","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"2663_CR106","doi-asserted-by":"crossref","unstructured":"Min, Z., Zhuang, B., Schulter, S., Liu, B., Dunn, E., & Chandraker, M. (2023). Neurocs: Neural nocs supervision for monocular 3d object localization. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21404\u201321414.","DOI":"10.1109\/CVPR52729.2023.02050"},{"key":"2663_CR107","doi-asserted-by":"crossref","unstructured":"Mirzaei, A., Kant, Y., Kelly, J., & Gilitschenski I. (2022). Laterf: Label and text driven object radiance fields. In European Conference on Computer Vision, pp. 20\u201336. Springer.","DOI":"10.1007\/978-3-031-20062-5_2"},{"key":"2663_CR108","doi-asserted-by":"crossref","unstructured":"Mirzaei, A., Aumentado-Armstrong, T., Brubaker, M.\u00a0A., Kelly, J., Levinshtein, A., Derpanis, K.\u00a0G., & Gilitschenski, I. (2023a). Reference-guided controllable inpainting of neural radiance fields. arXiv preprint arXiv:2304.09677.","DOI":"10.1109\/ICCV51070.2023.01633"},{"key":"2663_CR109","doi-asserted-by":"crossref","unstructured":"Mirzaei, A., Aumentado-Armstrong, T., Derpanis, K.\u00a0G., Kelly, J., Brubaker, M.\u00a0A., Gilitschenski, I., & Levinshtein, A. (2023b). Spin-nerf: Multiview segmentation and perceptual inpainting with neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20669\u201320679.","DOI":"10.1109\/CVPR52729.2023.01980"},{"key":"2663_CR110","unstructured":"Mittal, A. (2023). Neural radiance fields: Past, present, and future. arXiv preprint arXiv:2304.10050."},{"key":"2663_CR111","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602."},{"key":"2663_CR112","doi-asserted-by":"crossref","unstructured":"Moulon, P., Monasse, P., Perrot, R., & Marlet, R. (2017). Openmvg: Open multiple view geometry. In Reproducible Research in Pattern Recognition: First International Workshop, RRPR 2016, Canc\u00fan, Mexico, December 4, 2016, Revised Selected Papers 1, pp. 60\u201374. Springer.","DOI":"10.1007\/978-3-319-56414-2_5"},{"key":"2663_CR113","doi-asserted-by":"crossref","unstructured":"Mu, J., Sang, S., Vasconcelos, N., & Wang, X. (2023). Actorsnerf: Animatable few-shot human rendering with generalizable nerfs. In Proceedings of the IEEE\/CVF International Conference on Computer Vision.","DOI":"10.1109\/ICCV51070.2023.01686"},{"key":"2663_CR114","doi-asserted-by":"crossref","unstructured":"M\u00fcller, N., Simonelli, A., Porzi, L., Bul\u00f2, S.\u00a0R., Nie\u00dfner, M., & Kontschieder, P. (2022a). Autorf: Learning 3d object radiance fields from single view observations. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3971\u20133980.","DOI":"10.1109\/CVPR52688.2022.00394"},{"issue":"4","key":"2663_CR115","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., & Keller, A. (2022). Instant neural graphics primitives with a multiresolution hash encoding. ACM Transactions on Graphics (ToG), 41(4), 1\u201315.","journal-title":"ACM Transactions on Graphics (ToG)"},{"key":"2663_CR116","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., & Geiger, A. (2021). Giraffe: Representing scenes as compositional generative neural feature fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11453\u201311464.","DOI":"10.1109\/CVPR46437.2021.01129"},{"key":"2663_CR117","doi-asserted-by":"crossref","unstructured":"Ost, J., Mannan, F., Thuerey, N., Knodt, J., & Heide, F. (2021). Neural scene graphs for dynamic scenes. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2856\u20132865.","DOI":"10.1109\/CVPR46437.2021.00288"},{"key":"2663_CR118","doi-asserted-by":"crossref","unstructured":"Park, K., Sinha, U., Hedman, P., Barron, J.\u00a0T., Bouaziz, S., Goldman, D.\u00a0B., Martin-Brualla, R., & Seitz, S.\u00a0M. (2021). Hypernerf: A higher-dimensional representation for topologically varying neural radiance fields. arXiv preprint arXiv:2106.13228.","DOI":"10.1145\/3478513.3480487"},{"key":"2663_CR119","doi-asserted-by":"crossref","unstructured":"Park, S., Son, M., Jang, S., Ahn, Y.\u00a0C., Kim, J.-Y., & Kang, N., (2023). Temporal interpolation is all you need for dynamic neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4212\u20134221.","DOI":"10.1109\/CVPR52729.2023.00410"},{"key":"2663_CR120","unstructured":"Poole, B., Jain, A., Barron, J.\u00a0T., & Mildenhall, B. (2022). Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988."},{"key":"2663_CR121","unstructured":"Rabby, A., & Zhang, C. (2023). Beyondpixels: A comprehensive review of the evolution of neural radiance fields. arXiv preprint arXiv:2306.03000."},{"key":"2663_CR122","unstructured":"Radford, A., Kim, J.\u00a0W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et\u00a0al. (2021). Learning transferable visual models from natural language supervision. In International conference on machine learning, pp. 8748\u20138763. PMLR."},{"key":"2663_CR123","doi-asserted-by":"crossref","unstructured":"Rebain, D., Matthews, M., Yi, K.\u00a0M., Lagun, D., & Tagliasacchi, A. (2022). Lolnerf: Learn from one look. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1558\u20131567.","DOI":"10.1109\/CVPR52688.2022.00161"},{"key":"2663_CR124","doi-asserted-by":"crossref","unstructured":"Reimers, N., & Gurevych, I. (2019). Sentence-bert: Sentence embeddings using siamese bert-networks. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics.","DOI":"10.18653\/v1\/D19-1410"},{"key":"2663_CR125","doi-asserted-by":"crossref","unstructured":"Reizenstein, J., Shapovalov, R., Henzler, P., Sbordone, L., Labatut, P., & Novotny, D. (2021). Common objects in 3d: Large-scale learning and evaluation of real-life 3d category reconstruction. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10901\u201310911.","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"2663_CR126","unstructured":"Rematas, K., Martin-Brualla, R., & Ferrari, V. (2021). Sharf: Shape-conditioned radiance fields from a single view. In International Conference on Machine Learning."},{"key":"2663_CR127","doi-asserted-by":"crossref","unstructured":"Ren, Z., Agarwala, A., Russell, B., Schwing, A.\u00a0G., & Wang, O. (2022). Neural volumetric object selection. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6133\u20136142.","DOI":"10.1109\/CVPR52688.2022.00604"},{"key":"2663_CR128","doi-asserted-by":"crossref","unstructured":"Roberts, M., Ramapuram, J., Ranjan, A., Kumar, A., Bautista, M.\u00a0A., Paczan, N., Webb, R., & Susskind, J.\u00a0M. (2021). Hypersim: A photorealistic synthetic dataset for holistic indoor scene understanding. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10912\u201310922.","DOI":"10.1109\/ICCV48922.2021.01073"},{"key":"2663_CR129","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., & Ommer, B. (2022). High-resolution image synthesis with latent diffusion models. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2663_CR130","doi-asserted-by":"crossref","unstructured":"Rosu, R.\u00a0A., & Behnke, S. (2023). Permutosdf: Fast multi-view reconstruction with implicit surfaces using permutohedral lattices. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8466\u20138475.","DOI":"10.1109\/CVPR52729.2023.00818"},{"key":"2663_CR131","doi-asserted-by":"crossref","unstructured":"Schonberger, J.\u00a0L., & Frahm, J.-M. (2016). Structure-from-motion revisited. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4104\u20134113.","DOI":"10.1109\/CVPR.2016.445"},{"key":"2663_CR132","first-page":"20154","volume":"33","author":"K Schwarz","year":"2020","unstructured":"Schwarz, K., Liao, Y., Niemeyer, M., & Geiger, A. (2020). Graf: Generative radiance fields for 3d-aware image synthesis. Advances in Neural Information Processing Systems, 33, 20154\u201320166.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2663_CR133","unstructured":"Seo, H., Kim, H., Kim, G., & Chun, S.\u00a0Y. (2023). Ditto-nerf: Diffusion-based iterative text to omni-directional 3d model. arXiv preprint arXiv:2304.02827."},{"key":"2663_CR134","unstructured":"Shafiullah, N.\u00a0M.\u00a0M., Paxton, C., Pinto, L., Chintala, S., & Szlam, A. (2023). Clip-fields: Weakly supervised semantic fields for robotic memory. In ICRA2023 Workshop on Pretraining for Robotics (PT4R)."},{"key":"2663_CR135","unstructured":"Sharma, P., Tewari, A., Du, Y., Zakharov, S., Ambrus, R.\u00a0A., Gaidon, A., Freeman, W.\u00a0T., Durand, F., Tenenbaum, J.\u00a0B., & Sitzmann, V. (2022). Neural groundplans: Persistent neural scene representations from a single image. In The Eleventh International Conference on Learning Representations."},{"key":"2663_CR136","unstructured":"Shen, W., Yang, G., Yu, A., Wong, J., Kaelbling, L.\u00a0P., & Isola, P. (2023). Distilled feature fields enable few-shot language-guided manipulation. In Conference on Robot Learning, pp. 405\u2013424. PMLR."},{"key":"2663_CR137","unstructured":"Shim, D., Lee, S., & Kim, H.\u00a0J. (2023). Snerl: Semantic-aware neural radiance fields for reinforcement learning. In International Conference on Machine Learning. PMLR."},{"key":"2663_CR138","doi-asserted-by":"crossref","unstructured":"Siddiqui, Y., Porzi, L., Bul\u00f2, S.\u00a0R., M\u00fcller, N., Nie\u00dfner, M., Dai, A., Kontschieder, P. (2023). Panoptic lifting for 3d scene understanding with neural fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9043\u20139052.","DOI":"10.1109\/CVPR52729.2023.00873"},{"key":"2663_CR139","doi-asserted-by":"crossref","unstructured":"\u0160lapak, E., Pardo, E., Dopiriak, M., Maksymyuk, T., & Gazda, J. (2023). Neural radiance fields in the industrial and robotics domain: Applications, research opportunities and use cases. arXiv preprint arXiv:2308.07118.","DOI":"10.1016\/j.rcim.2024.102810"},{"key":"2663_CR140","unstructured":"Smith, C.\u00a0O., Yu, H.-X., Zakharov, S., Durand, F., Tenenbaum, J.\u00a0B., Wu, J., & Sitzmann, V. (2023). Unsupervised discovery and composition of object light fields. Transactions on Machine Learning Research."},{"key":"2663_CR141","doi-asserted-by":"crossref","unstructured":"Song, H., Choi, S., Do, H., Lee, C., & Kim, T. (2023). Blending-nerf: Text-driven localized editing in neural radiance fields. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14383\u201314393.","DOI":"10.1109\/ICCV51070.2023.01323"},{"key":"2663_CR142","doi-asserted-by":"crossref","unstructured":"Song, S., Lichtenberg, S.\u00a0P., & Xiao, J. (2015). Sun rgb-d: A rgb-d scene understanding benchmark suite. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 567\u2013576.","DOI":"10.1109\/CVPR.2015.7298655"},{"key":"2663_CR143","unstructured":"Standley, T., Zamir, A., Chen, D., Guibas, L., Malik, J., & Savarese, S. (2020). Which tasks should be learned together in multi-task learning? In International Conference on Machine Learning, pp. 9120\u20139132. PMLR."},{"key":"2663_CR144","unstructured":"Stelzner, K., Kersting, K., & Kosiorek, A.\u00a0R. (2021). Decomposing 3d scenes into objects via unsupervised volume segmentation. arXiv preprint arXiv:2104.01148."},{"key":"2663_CR145","unstructured":"Straub, J., Whelan, T., Ma, L., Chen, Y., Wijmans, E., Green, S., Engel, J.\u00a0J., Mur-Artal, R., Ren, C., Verma, S. et\u00a0al. (2019). The replica dataset: A digital replica of indoor spaces. arXiv preprint arXiv:1906.05797."},{"key":"2663_CR146","first-page":"12278","volume":"34","author":"S-Y Su","year":"2021","unstructured":"Su, S.-Y., Yu, F., Zollh\u00f6fer, M., & Rhodin, H. (2021). A-nerf: Articulated neural radiance fields for learning human shape, appearance, and pose. Advances in Neural Information Processing Systems, 34, 12278\u201312291.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2663_CR147","doi-asserted-by":"crossref","unstructured":"Sun, C., Sun, M., & Chen, H.-T. (2022a). Direct voxel grid optimization: Super-fast convergence for radiance fields reconstruction. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5459\u20135469.","DOI":"10.1109\/CVPR52688.2022.00538"},{"key":"2663_CR148","doi-asserted-by":"crossref","unstructured":"Sun, J., Wang, X., Zhang, Y., Li, X., Zhang, Q., Liu, Y., & Wang, J. (2022b). Fenerf: Face editing in neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7672\u20137682.","DOI":"10.1109\/CVPR52688.2022.00752"},{"key":"2663_CR149","doi-asserted-by":"crossref","unstructured":"Sun, P., Kretzschmar, H., Dotiwalla, X., Chouard, A., Patnaik, V., Tsui, P., Guo, J., Zhou, Y., Chai, Y., Caine, B. et\u00a0al. (2020). Scalability in perception for autonomous driving: Waymo open dataset. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2446\u20132454.","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"2663_CR150","doi-asserted-by":"crossref","unstructured":"Sun, T., Segu, M., Postels, J., Wang, Y., Van\u00a0Gool, L., Schiele, B., Tombari, F., & Yu, F. (2022c). Shift: A synthetic driving dataset for continuous multi-task domain adaptation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21371\u201321382.","DOI":"10.1109\/CVPR52688.2022.02068"},{"key":"2663_CR151","doi-asserted-by":"crossref","unstructured":"Suvorov, R., Logacheva, E., Mashikhin, A., Remizova, A., Ashukha, A., Silvestrov, A., Kong, N., Goka, H., Park, K., & Lempitsky, V. (2022). Resolution-robust large mask inpainting with fourier convolutions. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp. 2149\u20132159.","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"2663_CR152","doi-asserted-by":"crossref","unstructured":"Tang, S., Pei, W., Tao, X., Jia, T., Lu, G., & Tai, Y.-W. (2023). Scene-generalizable interactive segmentation of radiance fields. In Proceedings of the 31st ACM International Conference on Multimedia, pp. 6744\u20136755.","DOI":"10.1145\/3581783.3612246"},{"key":"2663_CR153","doi-asserted-by":"crossref","unstructured":"Tewari, A., Thies, J., Mildenhall, B., Srinivasan, P., Tretschk, E., Yifan, W., Lassner, C., Sitzmann, V., Martin-Brualla, R., Lombardi, S. et\u00a0al. (2022). Advances in neural rendering. In Computer Graphics Forum, vol.\u00a041, pp. 703\u2013735. Wiley Online Library.","DOI":"10.1111\/cgf.14507"},{"key":"2663_CR154","unstructured":"Tsagkas, N., Mac\u00a0Aodha, O., & Lu, C.\u00a0X. (2023). Vl-fields: Towards language-grounded neural implicit spatial representations. arXiv preprint arXiv:2305.12427."},{"key":"2663_CR155","doi-asserted-by":"crossref","unstructured":"Tschernezki, V., Larlus, D., & Vedaldi, A. (2021). Neuraldiff: Segmenting 3d objects that move in egocentric videos. In 2021 International Conference on 3D Vision (3DV), pp. 910\u2013919. IEEE.","DOI":"10.1109\/3DV53792.2021.00099"},{"key":"2663_CR156","doi-asserted-by":"crossref","unstructured":"Tschernezki, V., Laina, I., Larlus, D., & Vedaldi, A. (2022). Neural feature fusion fields: 3d distillation of self-supervised 2d image representations. In 2022 International Conference on 3D Vision (3DV), pp. 443\u2013453. IEEE.","DOI":"10.1109\/3DV57658.2022.00056"},{"key":"2663_CR157","doi-asserted-by":"crossref","unstructured":"Turki, H., Zhang, J.\u00a0Y., Ferroni, F., & Ramanan, D. (2023). Suds: Scalable urban dynamic scenes. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12375\u201312385.","DOI":"10.1109\/CVPR52729.2023.01191"},{"issue":"7","key":"2663_CR158","first-page":"3614","volume":"44","author":"S Vandenhende","year":"2021","unstructured":"Vandenhende, S., Georgoulis, S., Van Gansbeke, W., Proesmans, M., Dai, D., & Van Gool, L. (2021). Multi-task learning for dense prediction tasks: A survey. IEEE transactions on pattern analysis and machine intelligence, 44(7), 3614\u20133633.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"2663_CR159","unstructured":"Vora, S., Radwan, N., Greff, K., Meyer, H., Genova, K., Sajjadi, M.\u00a0S.\u00a0M., Pot, E., Tagliasacchi, A., & Duckworth, D. (2022). NeSF: Neural semantic fields for generalizable semantic segmentation of 3d scenes. Transactions on Machine Learning Research. ISSN 2835-8856."},{"key":"2663_CR160","doi-asserted-by":"crossref","unstructured":"Wang, C., Chai, M., He, M., Chen, D., & Liao, J. (2022). Clip-nerf: Text-and-image driven manipulation of neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3835\u20133844.","DOI":"10.1109\/CVPR52688.2022.00381"},{"key":"2663_CR161","doi-asserted-by":"crossref","unstructured":"Wang, C., Jiang, R., Chai, M., He, M., Chen, D., & Liao, J. (2023a). Nerf-art: Text-driven neural radiance fields stylization. IEEE Transactions on Visualization and Computer Graphics.","DOI":"10.1109\/TVCG.2023.3283400"},{"key":"2663_CR162","doi-asserted-by":"crossref","unstructured":"Wang, D., Zhang, T., Abboud, A., & S\u00fcsstrunk, S. (2023b). Inpaintnerf360: Text-guided 3d inpainting on unbounded neural radiance fields. arXiv preprint arXiv:2305.15094.","DOI":"10.1109\/CVPR52733.2024.01205"},{"key":"2663_CR163","doi-asserted-by":"crossref","unstructured":"Wang, F., Louys, A., Piasco, N., Bennehar, M., Rold\u00e3o, L., & Tsishkou, D. (2024). Planerf: Svd unsupervised 3d plane regularization for nerf large-scale scene reconstruction.","DOI":"10.1109\/3DV62453.2024.00114"},{"key":"2663_CR164","doi-asserted-by":"crossref","unstructured":"Weder, S., Garcia-Hernando, G., Monszpart, A., Pollefeys, M., Brostow, G.\u00a0J., Firman, M., & Vicente, S. (2023). Removing objects from neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16528\u201316538.","DOI":"10.1109\/CVPR52729.2023.01586"},{"key":"2663_CR165","unstructured":"Wei, X., Zhang, R., Wu, J., Liu, J., Lu, M., Guo, Y., & Zhang, S. (2023). Noc: High-quality neural object cloning with 3d lifting of segment anything. arXiv preprint arXiv:2309.12790."},{"key":"2663_CR166","doi-asserted-by":"crossref","unstructured":"Weng, C.-Y., Curless, B., Srinivasan, P.\u00a0P., Barron, J.\u00a0T., & Kemelmacher-Shlizerman, I. (2022). Humannerf: Free-viewpoint rendering of moving people from monocular video. In Proceedings of the IEEE\/CVF conference on computer vision and pattern Recognition, pp. 16210\u201316220.","DOI":"10.1109\/CVPR52688.2022.01573"},{"key":"2663_CR167","doi-asserted-by":"crossref","unstructured":"Wimbauer, F., Yang, N., Rupprecht, C., & Cremers, D. (2023). Behind the scenes: Density fields for single view reconstruction. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9076\u20139086.","DOI":"10.1109\/CVPR52729.2023.00876"},{"key":"2663_CR168","doi-asserted-by":"crossref","unstructured":"Wong, Y.-S., & Mitra, N.\u00a0J. (2023). Factored neural representation for scene understanding. In Computer Graphics Forum, pp. e14911. Wiley Online Library.","DOI":"10.1111\/cgf.14911"},{"key":"2663_CR169","doi-asserted-by":"crossref","unstructured":"Wu, T., Zhang, J., Fu, X., Wang, Y., Ren, J., Pan, L., Wu, W., Yang, L., Wang, J., Qian, C., et\u00a0al. (2023a). Omniobject3d: Large-vocabulary 3d object dataset for realistic perception, reconstruction and generation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 803\u2013814.","DOI":"10.1109\/CVPR52729.2023.00084"},{"key":"2663_CR170","unstructured":"Wu, T.\u00a0W., Zhong, F., Tagliasacchi, A., Cole, F., & Oztireli, C. (2022). D$$\\hat{\\phantom{a}}$$ 2nerf: Self-supervised decoupling of dynamic and static objects from a monocular video. In Advances in Neural Information Processing Systems."},{"key":"2663_CR171","doi-asserted-by":"crossref","unstructured":"Wu, Z., Liu, T., Luo, L., Zhong, Z., Chen, J., Xiao, H., Hou, C., Lou, H., Chen, Y., Yang, R., et\u00a0al. (2023b). Mars: An instance-aware, modular and realistic simulator for autonomous driving. arXiv preprint arXiv:2307.15058.","DOI":"10.1007\/978-981-99-8850-1_1"},{"issue":"4","key":"2663_CR172","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3626193","volume":"56","author":"W Xia","year":"2023","unstructured":"Xia, W., & Xue, J.-H. (2023). A survey on deep generative 3d-aware image synthesis. ACM Computing Surveys, 56(4), 1\u201334.","journal-title":"ACM Computing Surveys"},{"key":"2663_CR173","doi-asserted-by":"crossref","unstructured":"Xiang, J., Yang, J., Deng, Y., & Tong, X. (2023). Gram-hd: 3d-consistent image generation at high resolution with generative radiance manifolds. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2195\u20132205.","DOI":"10.1109\/ICCV51070.2023.00209"},{"key":"2663_CR174","doi-asserted-by":"crossref","unstructured":"Xiangli, Y., Xu, L., Pan, X., Zhao, N., Dai, B., & Lin, D. (2023). Assetfield: Assets mining and reconfiguration in ground feature plane representation. arXiv preprint arXiv:2303.13953.","DOI":"10.1109\/ICCV51070.2023.00301"},{"key":"2663_CR175","doi-asserted-by":"crossref","unstructured":"Xie, C., Park, K., Martin-Brualla, R., & Brown, M. (2021). Fig-nerf: Figure-ground neural radiance fields for 3d object category modelling. In 2021 International Conference on 3D Vision (3DV), pp. 962\u2013971. IEEE.","DOI":"10.1109\/3DV53792.2021.00104"},{"key":"2663_CR176","doi-asserted-by":"crossref","unstructured":"Xie, Y., Takikawa, T., Saito, S., Litany, O., Yan, S., Khan, N., Tombari, F., Tompkin, J., Sitzmann, V., & Sridhar, S. (2022). Neural fields in visual computing and beyond. In Computer Graphics Forum, vol.\u00a041, pp. 641\u2013676. Wiley Online Library.","DOI":"10.1111\/cgf.14505"},{"key":"2663_CR177","doi-asserted-by":"crossref","unstructured":"Xu, C., Wu, B., Hou, J., Tsai, S., Li, R., Wang, J., Zhan, , He, Z., Vajda, P., Keutzer, K., et\u00a0al. (2023a). Nerf-det: Learning geometry-aware volumetric representation for multi-view 3d object detection. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 23320\u201323330.","DOI":"10.1109\/ICCV51070.2023.02131"},{"key":"2663_CR178","doi-asserted-by":"crossref","unstructured":"Xu, D., Jiang, Y., Wang, P., Fan, Z., Shi, H., & Wang, Z. (2022). Sinnerf: Training neural radiance fields on complex scenes from a single image. In European Conference on Computer Vision, pp. 736\u2013753. Springer.","DOI":"10.1007\/978-3-031-20047-2_42"},{"key":"2663_CR179","doi-asserted-by":"crossref","unstructured":"Xu, J., Peng, L., Cheng, H., Li, H., Qian, W., Li, K., Wang, W., & Cai, D. (2023b). Mononerd: Nerf-like representations for monocular 3d object detection. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6814\u20136824.","DOI":"10.1109\/ICCV51070.2023.00627"},{"key":"2663_CR180","doi-asserted-by":"crossref","unstructured":"Xu, X., Yang, Y., Mo, K., Pan, B., Yi, L., & Guibas, L. (2023c). Jacobinerf: Nerf shaping with mutual information gradients. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16498\u201316507.","DOI":"10.1109\/CVPR52729.2023.01583"},{"key":"2663_CR181","doi-asserted-by":"crossref","unstructured":"Xu, Y., Chai, M., Shi, Z., Peng, S., Skorokhodov, I., Siarohin, A., Yang, C., Shen, Y., Lee, H.-Y., Zhou, B. et\u00a0al. (2023d). Discoscene: Spatially disentangled generative radiance fields for controllable 3d-aware scene synthesis. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4402\u20134412.","DOI":"10.1109\/CVPR52729.2023.00428"},{"key":"2663_CR182","doi-asserted-by":"crossref","unstructured":"Xue, Y., Li, Y., Singh, K.\u00a0K., & Lee, Y.\u00a0J. (2022). Giraffe hd: A high-resolution 3d-aware generative model. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18440\u201318449.","DOI":"10.1109\/CVPR52688.2022.01789"},{"key":"2663_CR183","doi-asserted-by":"crossref","unstructured":"Yadav, K., Ramrakhya, R., Ramakrishnan, S.\u00a0K., Gervet, T., Turner, J., Gokaslan, A., Maestre, N., Chang, A.\u00a0X., Batra, D., Savva, M., et\u00a0al. (2023). Habitat-matterport 3d semantics dataset. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4927\u20134936.","DOI":"10.1109\/CVPR52729.2023.00477"},{"key":"2663_CR184","doi-asserted-by":"crossref","unstructured":"Yang, B., Zhang, Y., Xu, Y., Li, Y., Zhou, H., Bao, H., Zhang, G., & Cui, Z. (2021). Learning object-compositional neural radiance field for editable scene rendering. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13779\u201313788.","DOI":"10.1109\/ICCV48922.2021.01352"},{"issue":"4","key":"2663_CR185","first-page":"1","volume":"41","author":"B Yang","year":"2022","unstructured":"Yang, B., Zhang, Y., Li, Y., Cui, Z., Fanello, S., Bao, H., & Zhang, G. (2022). Neural rendering in a room: Amodal 3d understanding and free-viewpoint rendering for the closed scene composed of pre-captured objects. ACM Transactions on Graphics (TOG), 41(4), 1\u201310.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2663_CR186","unstructured":"Yang, J., Ivanovic, B., Litany, O., Weng, X., Kim, S.\u00a0W., Li, B., Che, T., Xu, D., Fidler, S., Pavone, M. et\u00a0al. (2023a). Emernerf: Emergent spatial-temporal scene decomposition via self-supervision. arXiv preprint arXiv:2311.02077."},{"key":"2663_CR187","doi-asserted-by":"crossref","unstructured":"Yang, Y., Yang, Y., Guo, H., Xiong, R., Wang, Y., & Liao, Y. (2023b). Urbangiraffe: Representing urban scenes as compositional generative neural feature fields. arXiv preprint arXiv:2303.14167.","DOI":"10.1109\/ICCV51070.2023.00844"},{"key":"2663_CR188","doi-asserted-by":"crossref","unstructured":"Ye, B., Liu, S., Li, X., & Yang, M.-H. (2023a). Self-supervised super-plane for neural 3d reconstruction. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21415\u201321424.","DOI":"10.1109\/CVPR52729.2023.02051"},{"key":"2663_CR189","doi-asserted-by":"crossref","unstructured":"Ye, J., Wang, N., & Wang, X. (2023b). Featurenerf: Learning generalizable nerfs by distilling pre-trained vision foundation models. In Proceedings of the IEEE\/CVF International Conference on Computer Vision.","DOI":"10.1109\/ICCV51070.2023.00823"},{"key":"2663_CR190","doi-asserted-by":"crossref","unstructured":"Ye, W., Chen, S., Bao, C., Bao, H., Pollefeys, M., Cui, Z., & Zhang, G. (2023c). Intrinsicnerf: Learning intrinsic neural radiance fields for editable novel view synthesis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 339\u2013351.","DOI":"10.1109\/ICCV51070.2023.00038"},{"key":"2663_CR191","doi-asserted-by":"crossref","unstructured":"Yeshwanth, C., Liu, Y.-C., Nie\u00dfner, M., & Dai, A. (2023). Scannet++: A high-fidelity dataset of 3d indoor scenes. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12\u201322.","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"2663_CR192","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., & Kanazawa, A. (2021). pixelnerf: Neural radiance fields from one or few images. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4578\u20134587.","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"2663_CR193","unstructured":"Yu, H.-X., Guibas, L. J., & Wu, J. (2022). Unsupervised discovery of object radiance fields."},{"key":"2663_CR194","doi-asserted-by":"crossref","unstructured":"Yuan, W., Lv, Z., Schmidt, T., & Lovegrove, S. (2021). Star: Self-supervised tracking and reconstruction of rigid objects in motion with neural rendering. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13144\u201313152.","DOI":"10.1109\/CVPR46437.2021.01294"},{"key":"2663_CR195","doi-asserted-by":"crossref","unstructured":"Zamir, A.\u00a0R., Sax, A., Shen, W., Guibas, L.\u00a0J., Malik, J., & Savarese, S. (2018). Taskonomy: Disentangling task transfer learning. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3712\u20133722.","DOI":"10.1109\/CVPR.2018.00391"},{"key":"2663_CR196","unstructured":"Zarzar, J., Rojas, S., Giancola, S., & Ghanem, B. (2022). Segnerf: 3d part segmentation with neural radiance fields. arXiv preprint arXiv:2211.11215."},{"key":"2663_CR197","unstructured":"Ze, Y., Yan, G., Wu, Y.-H., Macaluso, A., Ge, Y., Ye, J., Hansen, N., Li, L.\u00a0E. & Wang, X. (2023). Gnfactor: Multi-task real robot learning with generalizable neural feature fields. In Conference on Robot Learning, pp. 284\u2013301. PMLR."},{"issue":"4","key":"2663_CR198","first-page":"1","volume":"40","author":"J Zhang","year":"2021","unstructured":"Zhang, J., Liu, X., Ye, X., Zhao, F., Zhang, Y., Wu, M., Zhang, Y., Xu, L., & Yu, J. (2021). Editable free-viewpoint video using a layered neural representation. ACM Transactions on Graphics (TOG), 40(4), 1\u201318.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2663_CR199","doi-asserted-by":"crossref","unstructured":"Zhang, J., Li, X., Wan, Z., Wang, C., & Liao, J. (2022a). Fdnerf: Few-shot dynamic neural radiance fields for face reconstruction and expression editing. In SIGGRAPH Asia 2022 Conference Papers, pp. 1\u20139.","DOI":"10.1145\/3550469.3555404"},{"key":"2663_CR200","doi-asserted-by":"crossref","unstructured":"Zhang, J., Sangineto, E., Tang, H., Siarohin, A., Zhong, Z., Sebe, N., & Wang, W. (2022b). 3d-aware semantic-guided generative model for human synthesis. In European Conference on Computer Vision, pp. 339\u2013356. Springer.","DOI":"10.1007\/978-3-031-19784-0_20"},{"key":"2663_CR201","doi-asserted-by":"crossref","unstructured":"Zhang, M., Zheng, S., Bao, Z., Hebert, M., & Wang, Y.-X. (2023a). Beyond rgb: Scene-property synthesis with neural radiance fields. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 795\u2013805.","DOI":"10.1109\/WACV56688.2023.00086"},{"key":"2663_CR202","doi-asserted-by":"crossref","unstructured":"Zhang, X., Kundu, A., Funkhouser, T., Guibas, L., Su, H. & Genova, K. (2023b). Nerflets: Local radiance fields for efficient structure-aware 3d scene representation from 2d supervision. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8274\u20138284.","DOI":"10.1109\/CVPR52729.2023.00800"},{"key":"2663_CR203","doi-asserted-by":"crossref","unstructured":"Zheng, S., Bao, Z., Hebert, M., & Wang, Y.-X. (2023). Multi-task view synthesis with neural radiance fields. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 21538\u201321549.","DOI":"10.1109\/ICCV51070.2023.01969"},{"key":"2663_CR204","doi-asserted-by":"crossref","unstructured":"Zhi, S., Laidlow, T., Leutenegger, S., & Davison, A.\u00a0J. (2021a). In-place scene labelling and understanding with implicit scene representation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15838\u201315847.","DOI":"10.1109\/ICCV48922.2021.01554"},{"key":"2663_CR205","unstructured":"Zhi, S., Sucar, E., Mouton, A., Haughton, I., Laidlow, T., & Davison, A.\u00a0J. (2021b). ilabel: Interactive neural scene labelling. arXiv preprint arXiv:2111.14637."},{"key":"2663_CR206","doi-asserted-by":"crossref","unstructured":"Zhu, C., Wan, R., Tang, Y., & Shi, B. (2023a). Occlusion-free scene recovery via neural radiance fields. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20722\u201320731.","DOI":"10.1109\/CVPR52729.2023.01985"},{"key":"2663_CR207","doi-asserted-by":"crossref","unstructured":"Zhu, F., Guo, S., Song, L., Xu, K., Hu, J. et\u00a0al. (2023b). Deep review and analysis of recent nerfs. APSIPA Transactions on Signal and Information Processing, 12(1).","DOI":"10.1561\/116.00000162"},{"key":"2663_CR208","doi-asserted-by":"crossref","unstructured":"Zhuang, Y., Zhu, H., Sun, X., & Cao, X. (2022). Mofanerf: Morphable facial neural radiance field. In European Conference on Computer Vision, pp. 268\u2013285. Springer.","DOI":"10.1007\/978-3-031-20062-5_16"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02663-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02663-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02663-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T08:42:19Z","timestamp":1774600939000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02663-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,6]]},"references-count":208,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["2663"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02663-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,6]]},"assertion":[{"value":"17 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"109"}}