{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T12:28:59Z","timestamp":1772454539083,"version":"3.50.1"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031728891","type":"print"},{"value":"9783031728907","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72890-7_3","type":"book-chapter","created":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T19:46:24Z","timestamp":1733514384000},"page":"37-53","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["SUP-NeRF: A Streamlined Unification of\u00a0Pose Estimation and\u00a0NeRF for\u00a0Monocular 3D Object Reconstruction"],"prefix":"10.1007","author":[{"given":"Yuliang","family":"Guo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abhinav","family":"Kumar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruoyu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyu","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liu","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,7]]},"reference":[{"key":"3_CR1","unstructured":"Batra, D., et al.: Rearrangement: a challenge for embodied AI. CoRR arXiv:2011.01975 (2020)"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Bian, W., Wang, Z., Li, K., Bian, J.: Nope-NeRF: optimising neural radiance field with no pose prior. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00405"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Chen, H., Manhardt, F., Navab, N., Busam, B.: Texpose: neural texture learning for self-supervised 6d object pose estimation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00469"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y., Lee, G.H.: DBARF: deep bundle-adjusting generalizable neural radiance fields. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00011"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? the KITTI vision benchmark suite. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"3_CR7","unstructured":"Girshick, R., Radosavovic, I., Gkioxari, G., Doll\u00e1r, P., He, K.: Detectron (2018). https:\/\/github.com\/facebookresearch\/detectron"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Johnson, J., Malik, J.: Mesh R-CNN. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00988"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Henderson, P., Tsiminaki, V., Lampert, C.H.: Leveraging 2D data to learn textured 3D mesh generation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00752"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Hoda\u0148, T., et al.: BOP: Benchmark for 6D object pose estimation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01249-6_2"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Insafutdinov, E., Campbell, D., Henriques, J.F., Vedaldi, A.: SNeS: learning probably symmetric neural surfaces from incomplete data. In: Avidan, S., Brostow, G.J., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV, pp. 367\u2013383 (2022)","DOI":"10.1007\/978-3-031-19824-3_22"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Jang, W., Agapito, L.: CodeNeRF: disentangled neural radiance fields for object categories. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01271"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Tulsiani, S., Efros, A.A., Malik, J.: Learning category-specific mesh reconstruction from image collections. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV (2018)","DOI":"10.1007\/978-3-030-01267-0_23"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Kehl, W., Manhardt, F., Tombari, F., Ilic, S., Navab, N.: SSD-6D: making RGB-based 3D detection and 6d pose estimation great again. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.169"},{"key":"3_CR15","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3592433","volume":"42","author":"B Kerbl","year":"2023","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42, 139\u2013140 (2023)","journal-title":"ACM Trans. Graph."},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Kumar, A., Brazil, G., Corona, E., Parchami, A., Liu, X.: DEVIANT: depth equivariant network for monocular 3D object detection. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20077-9_39"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Kumar, A., Guo, Y., Huang, X., Ren, L., Liu, X.: SeaBird: segmentation in bird\u2019s view with dice loss improves monocular 3D detection of large objects. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00978"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Kundu, A., et al.: Panoptic neural fields: a semantic object-aware neural scene representation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01253"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Kundu, A., Li, Y., Rehg, J.M.: 3D-RCNN: instance-level 3D object reconstruction via render-and-compare. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00375"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Li, F., Yu, H., Shugurov, I., Busam, B., Yang, S., Ilic, S.: NeRF-Pose: a first-reconstruct-then-regress approach for weakly-supervised 6D object pose estimation. CoRR arXiv:2203.04802 (2022)","DOI":"10.1109\/ICCVW60793.2023.00226"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, G., Ji, X., Xiang, Y., Fox, D.: DeepIM: deep iterative matching for 6D pose estimation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV (2018)","DOI":"10.1007\/978-3-030-01231-1_42"},{"key":"3_CR22","unstructured":"Lin, Y., Florence, P., Barron, J.T., Rodriguez, A., Isola, P., Lin, T.: INeRF: inverting neural radiance fields for pose estimation. In: IROS (2021)"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Lipson, L., Teed, Z., Goyal, A., Deng, J.: Coupled iterative refinement for 6D multi-object pose estimation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00661"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Lu, Y., et al.: Geometry uncertainty projection network for monocular 3D object detection. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Merrill, N., et al.: Symmetry and uncertainty-aware object SLAM for 6D of object pose estimation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01448"},{"key":"3_CR26","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65, 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Mousavian, A., Anguelov, D., Flynn, J., Kosecka, J.: 3D bounding box estimation using deep learning and geometry. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.597"},{"key":"3_CR28","doi-asserted-by":"crossref","unstructured":"M\u00fcller, N., Simonelli, A., Porzi, L., Bul\u00f2, S.R., Nie\u00dfner, M., Kontschieder, P.: AutoRF: learning 3D object radiance fields from single view observations. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00394"},{"key":"3_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. 41, 1\u20135 (2022)","journal-title":"ACM Trans. Graph."},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Park, D., Ambrus, R., Guizilini, V., Li, J., Gaidon, A.: Is pseudo-lidar needed for monocular 3D object detection? In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00313"},{"key":"3_CR31","doi-asserted-by":"crossref","unstructured":"Park, K., Patten, T., Vincze, M.: Pix2Pose: Pixel-wise coordinate regression of objects for 6D pose estimation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00776"},{"key":"3_CR32","doi-asserted-by":"crossref","unstructured":"Pavllo, D., Tan, D.J., Rakotosaona, M.J., Tombari, F.: Shape, pose, and appearance from a single image via bootstrapped radiance field inversion. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00427"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Rad, M., Lepetit, V.: BB8: a scalable, accurate, robust to partial occlusion method for predicting the 3D poses of challenging objects without using depth. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.413"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bul\u00f2, S.R., Porzi, L., Lopez-Antequera, M., Kontschieder, P.: Disentangling monocular 3D object detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"3_CR35","unstructured":"Sol\u00e0, J., Deray, J., Atchuthan, D.: A micro lie theory for state estimation in robotics. CoRR arXiv:1812.01537 (2018)"},{"key":"3_CR36","doi-asserted-by":"crossref","unstructured":"Sun, P., et al.: Scalability in perception for autonomous driving: Waymo open dataset. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Tekin, B., Sinha, S.N., Fua, P.: Real-time seamless single shot 6D object pose prediction. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00038"},{"key":"3_CR38","doi-asserted-by":"crossref","unstructured":"Tian, S., et al.: Multi-object manipulation via object-centric neural scattering functions. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00871"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Wang, H., Sridhar, S., Huang, J., Valentin, J., Song, S., Guibas, L.J.: Normalized object coordinate space for category-level 6D object pose and size estimation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00275"},{"key":"3_CR40","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhu, X., Pang, J., Lin, D.: FCOS3D: fully convolutional one-stage monocular 3D object detection. In: ICCVW (2021)","DOI":"10.1109\/ICCVW54120.2021.00107"},{"key":"3_CR41","doi-asserted-by":"crossref","unstructured":"Weihs, L., Deitke, M., Kembhavi, A., Mottaghi, R.: Visual room rearrangement. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00586"},{"key":"3_CR42","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Schmidt, T., Narayanan, V., Fox, D.: PoseCNN: a convolutional neural network for 6D object pose estimation in cluttered scenes. In: RSS (2018)","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"3_CR43","doi-asserted-by":"crossref","unstructured":"Yang, B., et al.: Learning object-compositional neural radiance field for editable scene rendering. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01352"},{"key":"3_CR44","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., Kanazawa, A.: pixelNeRF: neural radiance fields from one or few images. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"3_CR45","doi-asserted-by":"crossref","unstructured":"Yu, H., Wu, J., Yi, L.: Rotationally equivariant 3D object detection. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00151"},{"key":"3_CR46","unstructured":"Zakharov, S., et al.: Single-shot scene reconstruction. In: CoRL (2021)"},{"key":"3_CR47","doi-asserted-by":"crossref","unstructured":"Zakharov, S., Kehl, W., Bhargava, A., Gaidon, A.: Autolabeling 3D objects with differentiable rendering of SDF shape priors. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01224"},{"key":"3_CR48","doi-asserted-by":"crossref","unstructured":"Zakharov, S., Shugurov, I., Ilic, S.: DPOD: 6D pose object detector and refiner. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00203"},{"key":"3_CR49","doi-asserted-by":"crossref","unstructured":"Zhou, A., Kim, M.J., Wang, L., Florence, P., Finn, C.: Nerf in the palm of your hand: corrective augmentation for robotics via novel-view synthesis. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01717"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72890-7_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T20:02:59Z","timestamp":1733515379000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72890-7_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,7]]},"ISBN":["9783031728891","9783031728907"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72890-7_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,7]]},"assertion":[{"value":"7 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}