{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:11:51Z","timestamp":1778083911355,"version":"3.51.4"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732256","type":"print"},{"value":"9783031732263","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73226-3_12","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:02:57Z","timestamp":1730386977000},"page":"199-216","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Omni6DPose: A Benchmark and\u00a0Model for\u00a0Universal 6D Object Pose Estimation and\u00a0Tracking"],"prefix":"10.1007","author":[{"given":"Jiyao","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Weiyao","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Mingdong","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Fei","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Zijian","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Dong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"An, B., Geng, Y., Chen, K., Li, X., Dou, Q., Dong, H.: Rgbmanip: monocular image-based robotic manipulation through active object pose estimation (2023)","DOI":"10.1109\/ICRA57147.2024.10610690"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Chang, A., et al.: Matterport3D: learning from RGB-D data in indoor environments. arXiv preprint arXiv:1709.06158 (2017)","DOI":"10.1109\/3DV.2017.00081"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Chen, K., Dou, Q.: SGPA: structure-guided prior adaptation for category-level 6D object pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2773\u20132782 (2021)","DOI":"10.1109\/ICCV48922.2021.00277"},{"key":"12_CR4","unstructured":"Cheng, J., Wu, M., Zhang, R., Zhan, G., Wu, C., Dong, H.: Score-pa: score-based 3D part assembly. arXiv preprint arXiv:2309.04220 (2023)"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Ci, H., et al.: Gfpose: learning 3D human pose prior with gradient fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4800\u20134810 (2023)","DOI":"10.1109\/CVPR52729.2023.00465"},{"key":"12_CR6","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"374","DOI":"10.1007\/978-3-031-19842-7_22","volume-title":"ECCV 2022","author":"Q Dai","year":"2022","unstructured":"Dai, Q., et al.: Domain randomization-enhanced depth simulation and restoration for perceiving and grasping specular and transparent objects. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13699, pp. 374\u2013391. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19842-7_22"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Downs, L., et al.: Google scanned objects: a high-quality dataset of 3D scanned household items. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 2553\u20132560. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"12_CR8","unstructured":"Ester, M., Kriegel, H.P., Sander, J., Xu, X., et\u00a0al.: A density-based algorithm for discovering clusters in large spatial databases with noise. In: KDD, vol.\u00a096, pp. 226\u2013231 (1996)"},{"key":"12_CR9","unstructured":"Fu, Y., Wang, X.: Category-level 6D object pose estimation in the wild: a semi-supervised learning approach and a new dataset. In: Advances in Neural Information Processing Systems (2022)"},{"key":"12_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1007\/978-3-642-33885-4_60","volume-title":"Computer Vision \u2013 ECCV 2012. Workshops and Demonstrations","author":"S Hinterstoisser","year":"2012","unstructured":"Hinterstoisser, S., et al.: Technical demonstration on model based training, detection and pose estimation of texture-less 3D objects in heavily cluttered scenes. In: Fusiello, A., Murino, V., Cucchiara, R. (eds.) ECCV 2012. LNCS, vol. 7585, pp. 593\u2013596. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33885-4_60"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Hoda\u0148, T., Haluza, P., Obdr\u017e\u00e1lek, \u0160., Matas, J., Lourakis, M., Zabulis, X.: T-LESS: an RGB-D dataset for 6D pose estimation of texture-less objects. In: IEEE Winter Conference on Applications of Computer Vision (WACV) (2017)","DOI":"10.1109\/WACV.2017.103"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Jung, H., et al.: Housecat6d \u2013 a large-scale multi-modal category level 6D object perception dataset with household objects in realistic scenarios (2023)","DOI":"10.1109\/CVPR52733.2024.02123"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4015\u20134026 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Lin, Z.H., Huang, S.Y., Wang, Y.C.F.: Convolution in the cloud: Learning deformable kernels in 3D graph convolution networks for point cloud analysis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00187"},{"key":"12_CR15","doi-asserted-by":"publisher","unstructured":"Liu, J., Sun, W., Liu, C., Zhang, X., Fan, S., Wu, W.: HFF6D: hierarchical feature fusion network for robust 6d object pose tracking. IEEE Trans. Circuits Syst. Video Technol. 1\u201313 (2022). https:\/\/doi.org\/10.1109\/TCSVT.2022.3181597","DOI":"10.1109\/TCSVT.2022.3181597"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Liu, J., Chen, Y., Ye, X., Qi, X.: Prior-free category-level pose estimation with implicit space transformation. arXiv preprint arXiv:2303.13479 (2023)","DOI":"10.1109\/ICCV51070.2023.01285"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Liu, X., Iwase, S., Kitani, K.M.: Stereobj-1m: large-scale stereo image dataset for 6D object pose estimation. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01069"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Liu, X., Wang, G., Li, Y., Ji, X.: CATRE: iterative point clouds alignment for category-level object pose refinement. In: European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-20086-1_29"},{"issue":"12","key":"12_CR19","doi-asserted-by":"publisher","first-page":"2633","DOI":"10.1109\/TVCG.2015.2513408","volume":"22","author":"E Marchand","year":"2016","unstructured":"Marchand, E., Uchiyama, H., Spindler, F.: Pose estimation for augmented reality: a hands-on survey. IEEE Trans. Visual Comput. Graphics 22(12), 2633\u20132651 (2016). https:\/\/doi.org\/10.1109\/TVCG.2015.2513408","journal-title":"IEEE Trans. Visual Comput. Graphics"},{"key":"12_CR20","unstructured":"Oquab, M., et al.: Dinov2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"issue":"1","key":"12_CR21","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1109\/LRA.2021.3119379","volume":"7","author":"NA Piga","year":"2022","unstructured":"Piga, N.A., Onyshchuk, Y., Pasquale, G., Pattacini, U., Natale, L.: ROFT: real-time optical flow-aided 6d object pose and velocity tracking. IEEE Robot. Autom. Lett. 7(1), 159\u2013166 (2022). https:\/\/doi.org\/10.1109\/LRA.2021.3119379","journal-title":"IEEE Robot. Autom. Lett."},{"key":"12_CR22","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: Pointnet++: deep hierarchical feature learning on point sets in a metric space. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"12_CR23","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"12_CR24","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Tian, Y., et al.: Robokeygen: robot pose and joint angles estimation via diffusion-based 3D keypoint generation. arXiv preprint arXiv:2403.18259 (2024)","DOI":"10.1109\/ICRA57147.2024.10611423"},{"issue":"7","key":"12_CR26","doi-asserted-by":"publisher","first-page":"1661","DOI":"10.1162\/NECO_a_00142","volume":"23","author":"P Vincent","year":"2011","unstructured":"Vincent, P.: A connection between score matching and denoising autoencoders. Neural Comput. 23(7), 1661\u20131674 (2011)","journal-title":"Neural Comput."},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Wang, C., et al.: Densefusion: 6D object pose estimation by iterative dense fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3343\u20133352 (2019)","DOI":"10.1109\/CVPR.2019.00346"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Wang, H., Sridhar, S., Huang, J., Valentin, J., Song, S., Guibas, L.J.: Normalized object coordinate space for category-level 6D object pose and size estimation. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00275"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, K., Dou, Q.: Category-level 6D object pose estimation via cascaded relation and recurrent reconstruction networks. In: 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4807\u20134814. IEEE (2021)","DOI":"10.1109\/IROS51168.2021.9636212"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Wang, P., et al.: Phocal: a multi-modal dataset for category-level object pose estimation with photometrically challenging objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21222\u201321231 (2022)","DOI":"10.1109\/CVPR52688.2022.02054"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Wen, B., Bekris, K.E.: Bundletrack: 6D pose tracking for novel objects without instance or category-level 3D models. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (2021)","DOI":"10.1109\/IROS51168.2021.9635991"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Weng, Y., et al.: Captra: category-level pose tracking for rigid and articulated objects from point clouds. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 13209\u201313218 (2021)","DOI":"10.1109\/ICCV48922.2021.01296"},{"key":"12_CR33","first-page":"31986","volume":"35","author":"M Wu","year":"2022","unstructured":"Wu, M., Zhong, F., Xia, Y., Dong, H.: TarGF: learning target gradient field to rearrange objects without explicit goal specification. Adv. Neural. Inf. Process. Syst. 35, 31986\u201331999 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"12_CR34","unstructured":"Wu, T., Wu, M., Zhang, J., Gan, Y., Dong, H.: Learning score-based grasping primitive for human-assisting dexterous grasping. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Wu, T., et al.: Omniobject3d: large-vocabulary 3D object dataset for realistic perception, reconstruction and generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 803\u2013814 (2023)","DOI":"10.1109\/CVPR52729.2023.00084"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Schmidt, T., Narayanan, V., Fox, D.: Posecnn: a convolutional neural network for 6D object pose estimation in cluttered scenes (2018)","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Xue, T., Wu, M., Lu, L., Wang, H., Dong, H., Chen, B.: Learning gradient fields for scalable and generalizable irregular packing. In: SIGGRAPH Asia 2023 Conference Papers, pp. 1\u201311 (2023)","DOI":"10.1145\/3610548.3618235"},{"key":"12_CR38","doi-asserted-by":"crossref","unstructured":"Yeshwanth, C., Liu, Y.C., Nie\u00dfner, M., Dai, A.: Scannet++: a high-fidelity dataset of 3D indoor scenes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12\u201322 (2023)","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"12_CR39","doi-asserted-by":"crossref","unstructured":"You, Y., He, W., Liu, J., Xiong, H., Wang, W., Lu, C.: CPPF++: uncertainty-aware sim2real object pose estimation by vote aggregation. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3419038"},{"key":"12_CR40","doi-asserted-by":"crossref","unstructured":"You, Y., Shi, R., Wang, W., Lu, C.: CPPF: towards robust category-level 9D pose estimation in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6866\u20136875 (2022)","DOI":"10.1109\/CVPR52688.2022.00674"},{"key":"12_CR41","unstructured":"You, Y., et al.: Pace: pose annotations in cluttered environments. arXiv preprint arXiv:2312.15130 (2023)"},{"key":"12_CR42","unstructured":"Zeng, Y., et al.: Distilling functional rearrangement priors from large models. arXiv preprint arXiv:2312.01474 (2023)"},{"key":"12_CR43","unstructured":"Zhang, J., Wu, M., Dong, H.: Generative category-level object pose estimation via diffusion models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"12_CR44","doi-asserted-by":"crossref","unstructured":"Zheng, L., et al.: HS-pose: hybrid scope feature extraction for category-level object pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 17163\u201317173 (2023)","DOI":"10.1109\/CVPR52729.2023.01646"},{"key":"12_CR45","doi-asserted-by":"crossref","unstructured":"Zhou, L., Liu, Z., Gan, R., Wang, H., Ang, M.H.: DR-pose: a two-stage deformation-and-registration pipeline for category-level 6D object pose estimation (2023)","DOI":"10.1109\/IROS55552.2023.10341552"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73226-3_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:15:12Z","timestamp":1730387712000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73226-3_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9783031732256","9783031732263"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73226-3_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}