{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T17:21:53Z","timestamp":1781630513243,"version":"3.54.5"},"publisher-location":"Cham","reference-count":62,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732256","type":"print"},{"value":"9783031732263","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73226-3_24","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:02:57Z","timestamp":1730386977000},"page":"414-431","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":35,"title":["FreeZe: Training-Free Zero-Shot 6D Pose Estimation with\u00a0Geometric and\u00a0Vision Foundation Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7775-924X","authenticated-orcid":false,"given":"Andrea","family":"Caraffa","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4887-2038","authenticated-orcid":false,"given":"Davide","family":"Boscaini","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7028-2852","authenticated-orcid":false,"given":"Amir","family":"Hamza","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9769-1279","authenticated-orcid":false,"given":"Fabio","family":"Poiesi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"24_CR1","unstructured":"6D localization of unseen objects - Core datasets. https:\/\/bop.felk.cvut.cz\/leaderboards\/pose-estimation-unseen-bop23\/core-datasets\/. Accessed 1 Mar 2024"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Ao, S., Hu, Q., Yang, B., Markham, A., Guo, Y.: SpinNet: learning a general surface descriptor for 3D point cloud registration. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01158"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Ausserlechner, P., Haberger, D., Thalhammer, S., Weibel, J.B., Vincze, M.: ZS6D: zero-shot 6D object pose estimation using vision transformers. arXiv:2309.11986 (2023)","DOI":"10.1109\/ICRA57147.2024.10611464"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Brachmann, E., Krull, A., Michel, F., Gumhold, S., Shotton, J., Rother, C.: Learning 6D object pose estimation using 3D object coordinates. In: ECCV (2014)","DOI":"10.1007\/978-3-319-10605-2_35"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Castro, P., Kim, T.K.: PoseMatcher: one-shot 6D object pose estimation by deep feature matching. In: ICCV-W (2023)","DOI":"10.1109\/ICCVW60793.2023.00229"},{"key":"24_CR6","unstructured":"Chen, J., Sun, M., Bao, T., Zhao, R., Wu, L., He, Z.: ZeroPose: CAD-model-based zero-shot pose estimation. arXiv:2305.17934 (2023)"},{"issue":"3","key":"24_CR7","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/0262-8856(92)90066-C","volume":"10","author":"Y Chen","year":"1992","unstructured":"Chen, Y., Medioni, G.: Object modelling by registration of multiple range images. Image Vis. Comput. 10(3), 145\u2013155 (1992)","journal-title":"Image Vis. Comput."},{"key":"24_CR8","unstructured":"Choi, S., Zhou, Q.Y., Koltun, V.: Robust reconstruction of indoor scenes. In: CVPR (2015)"},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Choy, C., Park, J., Koltun, V.: Fully convolutional geometric features. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00905"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Corsetti, J., Boscaini, D., Poiesi, F.: Revisiting fully convolutional geometric features for object 6D pose estimation. In: ICCV-W (2023)","DOI":"10.1109\/ICCVW60793.2023.00224"},{"key":"24_CR11","unstructured":"Di\u00a0Felice, F., et al.: Zero123-6D: zero-shot novel view synthesis for RGB category-level 6D pose estimation. arXiv:2403.14279 (2024)"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Doumanoglou, A., Kouskouridas, R., Malassiotis, S., Kim, T.K.: Recovering 6D object pose and predicting next-best-view in the crowd. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.390"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Drost, B., Ulrich, M., Bergmann, P., Hartinger, P., Steger, C.: Introducing MVTec ITODD - a dataset for 3D object recognition in industry. In: ICCV-W (2017)","DOI":"10.1109\/ICCVW.2017.257"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Drost, B., Ulrich, M., Navab, N., Ilic, S.: Model globally, match locally: efficient and robust 3D object recognition. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5540108"},{"key":"24_CR15","unstructured":"Gao, N., Ngo, V.A., Ziesche, H., Neumann, G.: SA6D: self-adaptive few-shot 6D pose estimator for novel and occluded objects. In: CoRL (2023)"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Girdhar, R., et al.: ImageBind: one embedding space to bind them all. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"24_CR17","unstructured":"Goodwin, W., Havoutis, I., Posner, I.: You only look at one: category-level object representations for pose estimation from a single example. In: CoRL (2023)"},{"key":"24_CR18","doi-asserted-by":"crossref","unstructured":"Goodwin, W., Vaze, S., Havoutis, I., Posner, I.: Zero-shot category-level object pose estimation. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19842-7_30"},{"key":"24_CR19","unstructured":"He, X., Sun, J., Wang, Y., Huang, D., Bao, H., Zhou, X.: OnePose++: keypoint-free one-shot object pose estimation without CAD models. In: NeurIPS (2022)"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"He, Y., Huang, H., Fan, H., Chen, Q., Sun, J.: FFB6D: a full flow bidirectional fusion network for 6D pose estimation. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00302"},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"He, Y., Wang, Y., Fan, H., Sun, J., Chen, Q.: FS6D: few-shot 6D pose estimation of novel objects. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00669"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Hodan, T., Haluza, P., Obdrzalek, S., Matas, J., Lourakis, M., Zabulis, X.: T-LESS: an RGB-D dataset for 6D pose estimation of texture-less objects. In: WACV (2017)","DOI":"10.1109\/WACV.2017.103"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Hodan, T., et al.: BOP: benchmark for 6D object pose estimation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01249-6_2"},{"key":"24_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.119838","volume":"223","author":"S Hoque","year":"2023","unstructured":"Hoque, S., Xu, S., Maiti, A., Wei, Y., Arafat, M.Y.: Deep learning for 6D pose estimation of objects - a case study for autonomous driving. Expert Syst. Appl. 223, 119838 (2023)","journal-title":"Expert Syst. Appl."},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Huang, S., Gojcic, Z., Usvyatsov, M., Wieser, A., Schindler, K.: PREDATOR: registration of 3D point clouds with low overlap. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00425"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Kaskman, R., Zakharov, S., Shugurov, I., Ilic, S.: HomebrewedDB: RGB-D dataset for 6D pose estimation of 3D objects. In: ICCV-W (2019)","DOI":"10.1109\/ICCVW.2019.00338"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"24_CR28","unstructured":"Labb\u00e9, Y., et al.: MegaPose: 6D pose estimation of novel objects via render & compare. arXiv:2212.06870 (2022)"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Li, G., et al.: SD-pose: structural discrepancy aware category-level 6D object pose estimation. In: WACV (2023)","DOI":"10.1109\/WACV56688.2023.00564"},{"issue":"7","key":"24_CR30","doi-asserted-by":"publisher","first-page":"3368","DOI":"10.1109\/TVCG.2022.3160005","volume":"29","author":"L Li","year":"2022","unstructured":"Li, L., Fu, H., Ovsjanikov, M.: WSDesc: weakly supervised 3D local descriptor learning for point cloud registration. IEEE Trans. Vis. Comput. Graph. 29(7), 3368\u20133379 (2022)","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Li, L., Zhu, S., Fu, H., Tan, P., Tai, C.L.: End-to-end learning local multi-view descriptors for 3D point clouds. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00199"},{"key":"24_CR32","doi-asserted-by":"crossref","unstructured":"Li, Z., Wang, G., Ji, X.: CDPN: coordinates-based disentangled pose network for real-time RGB-based 6-DoF object pose estimation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00777"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Lin, J., Liu, L., Lu, D., Jia, K.: SAM-6D: segment anything model meets zero-shot 6D object pose estimation. arXiv:2311.15707 (2023)","DOI":"10.1109\/CVPR52733.2024.02636"},{"key":"24_CR34","doi-asserted-by":"crossref","unstructured":"Lin, J., Wei, Z., Li, Z., Xu, S., Jia, K., Li, Y.: DualPoseNet: category-level 6D object pose and size estimation using dual pose network with refined learning of pose consistency. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00354"},{"issue":"11","key":"24_CR35","doi-asserted-by":"publisher","first-page":"11171","DOI":"10.1109\/TII.2023.3244348","volume":"19","author":"J Liu","year":"2023","unstructured":"Liu, J., Sun, W., Liu, C., Zhang, X., Fu, Q.: Robotic continuous grasping system by shape transformer-guided multi-object category-level 6D pose estimation. IEEE Trans. Ind. Inform. 19(11), 11171\u201311181 (2023)","journal-title":"IEEE Trans. Ind. Inform."},{"key":"24_CR36","unstructured":"van\u00a0der Maaten, L., Hinton, G.: Visualizing high-dimensional data using t-SNE. J. Mach. Learn. Res. (2008)"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Nguyen, V.N., Groueix, T., Salzmann, M., Lepetit, V.: GigaPose: fast and robust novel object pose estimation via one correspondence. arXiv:2311.14155 (2023)","DOI":"10.1109\/CVPR52733.2024.00945"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Nguyen, V.N., Hodan, T., Ponimatkin, G., Groueix, T., Lepetit, V.: CNOS: a strong baseline for CAD-based novel object segmentation. In: ICCV-W (2023)","DOI":"10.1109\/ICCVW60793.2023.00227"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Nguyen, V.N., Hu, Y., Xiao, Y., Salzmann, M., Lepetit, V.: Templates for 3D object pose estimation revisited: generalization to new objects and robustness to occlusions. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00665"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Okorn, B., Gu, Q., Hebert, M., Held, D.: ZePHyR: zero-shot pose hypothesis rating. In: ICRA (2021)","DOI":"10.1109\/ICRA48506.2021.9560874"},{"key":"24_CR41","unstructured":"Oquab, M., et al.: DINOv2: learning robust visual features without supervision. arXiv:2304.07193 (2023)"},{"key":"24_CR42","doi-asserted-by":"crossref","unstructured":"\u00d6rnek, E.P., et al.: FoundPose: unseen object pose estimation with foundation features. arXiv:2311.18809 (2023)","DOI":"10.1007\/978-3-031-73347-5_10"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Peng, S., Liu, Y., Huang, Q., Zhou, X., Bao, H.: PVNet: pixel-wise voting network for 6DoF pose estimation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00469"},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Peng, S., et al.: OpenScene: 3D scene understanding with open vocabularies. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00085"},{"issue":"3","key":"24_CR45","first-page":"3979","volume":"45","author":"F Poiesi","year":"2023","unstructured":"Poiesi, F., Boscaini, D.: Learning general and distinctive 3D local deep descriptors for point cloud registration. IEEE TPAMI 45(3), 3979\u20133985 (2023)","journal-title":"IEEE TPAMI"},{"key":"24_CR46","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: PointNet++: deep hierarchical feature learning on point sets in a metric space. In: NeurIPS (2017)"},{"key":"24_CR47","doi-asserted-by":"crossref","unstructured":"Rad, M., Oberweger, M., Lepetit, V.: Feature mapping for learning fast and accurate 3D pose inference from synthetic images. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00490"},{"key":"24_CR48","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"key":"24_CR49","doi-asserted-by":"crossref","unstructured":"Rusu, R.B., Blodow, N., Beetz, M.: Fast point feature histograms (FPFH) for 3D registration. In: ICRA (2009)","DOI":"10.1109\/ROBOT.2009.5152473"},{"key":"24_CR50","doi-asserted-by":"crossref","unstructured":"Su, Y., Rambach, J., Minaskan, N., Lesur, P., Pagani, A., Stricker, D.: Deep multi-state object pose estimation for augmented reality assembly. In: ISMAR-Adjunct (2019)","DOI":"10.1109\/ISMAR-Adjunct.2019.00-42"},{"key":"24_CR51","doi-asserted-by":"crossref","unstructured":"Sun, J., et al.: OnePose: one-shot object pose estimation without cad models. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00670"},{"key":"24_CR52","doi-asserted-by":"crossref","unstructured":"Sundermeyer, M., et al.: BOP Challenge 2022 on detection, segmentation and pose estimation of specific rigid objects. In: CVPR (2023)","DOI":"10.1109\/CVPRW59228.2023.00279"},{"key":"24_CR53","doi-asserted-by":"crossref","unstructured":"Wang, C., et al.: DenseFusion: 6D object pose estimation by iterative dense fusion. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00346"},{"key":"24_CR54","doi-asserted-by":"crossref","unstructured":"Wen, B., Yang, W., Kautz, J., Birchfield, S.: FoundationPose: unified 6D pose estimation and tracking of novel objects. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01692"},{"key":"24_CR55","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Schmidt, T., Narayanan, V., Fox, D.: PoseCNN: a convolutional neural network for 6D object pose estimation in cluttered scenes. In: Robotic Science and Systems (2018)","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"24_CR56","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1016\/j.patcog.2016.11.019","volume":"65","author":"J Yang","year":"2017","unstructured":"Yang, J., Zhang, Q., Xiao, Y., Cao, Z.G.: TOLDI: an effective and robust approach for 3D local shape description. Pattern Recognit. 65, 175\u2013187 (2017)","journal-title":"Pattern Recognit."},{"key":"24_CR57","doi-asserted-by":"crossref","unstructured":"Zeng, A., Song, S., Nie\u00dfner, M., Fisher, M., Xiao, J., Funkhouser, T.: 3DMatch: learning local geometric descriptors from RGB-D reconstructions. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.29"},{"issue":"2","key":"24_CR58","first-page":"2541","volume":"7","author":"H Zhao","year":"2022","unstructured":"Zhao, H., Zhuang, H., Wang, C., Yang, M.: G3DOA: generalizable 3D descriptor with overlap attention for point cloud registration. RA-L 7(2), 2541\u20132548 (2022)","journal-title":"RA-L"},{"key":"24_CR59","unstructured":"Zhao, X., et al.: Fast segment anything. arXiv:2306.12156 (2023)"},{"key":"24_CR60","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Birdal, T., Deng, H., Tombari, F.: 3D point capsule networks. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00110"},{"key":"24_CR61","doi-asserted-by":"crossref","unstructured":"Zhu, X., et al.: PointCLIPv2: prompting CLIP and GPT for powerful 3D open-world learning. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00249"},{"key":"24_CR62","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2023.102541","volume":"82","author":"C Zhuang","year":"2023","unstructured":"Zhuang, C., Li, S., Ding, H.: Instance segmentation based 6D pose estimation of industrial objects using point clouds for robotic bin-picking. Robot. Comput.-Integr. Manufact. 82, 102541 (2023)","journal-title":"Robot. Comput.-Integr. Manufact."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73226-3_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:17:57Z","timestamp":1730387877000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73226-3_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9783031732256","9783031732263"],"references-count":62,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73226-3_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}