{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T13:51:49Z","timestamp":1742997109773,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819785070"},{"type":"electronic","value":"9789819785087"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8508-7_26","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T06:08:38Z","timestamp":1730527718000},"page":"371-386","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ESD-Pose: Enhanced Semantic Discrimination for\u00a0Generalizable 6D Pose Estimation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1651-0435","authenticated-orcid":false,"given":"Xingyuan","family":"Deng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4202-4476","authenticated-orcid":false,"given":"Kangru","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6655-188X","authenticated-orcid":false,"given":"Lei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1579-3942","authenticated-orcid":false,"given":"Dongchen","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7478-4544","authenticated-orcid":false,"given":"Jiamao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"Collet, A., Berenson, D., Srinivasa, S.S., Ferguson, D.: Object recognition and full pose registration from a single image for robotic manipulation. In: 2009 IEEE International Conference on Robotics and Automation, pp. 48\u201355. IEEE (2009)","DOI":"10.1109\/ROBOT.2009.5152739"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Burdea, G.C., Coiffet, P.: Virtual Reality Technology. Wiley (2003)","DOI":"10.1162\/105474603322955950"},{"issue":"12","key":"26_CR3","doi-asserted-by":"publisher","first-page":"2633","DOI":"10.1109\/TVCG.2015.2513408","volume":"22","author":"E Marchand","year":"2015","unstructured":"Marchand, E., Uchiyama, H., Spindler, F.: Pose estimation for augmented reality: a hands-on survey. IEEE Trans. Visual Comput. Graphics 22(12), 2633\u20132651 (2015)","journal-title":"IEEE Trans. Visual Comput. Graphics"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Peng, S., Liu, Y., Huang, Q., Zhou, X., Bao, H.: PVNet: pixel-wise voting network for 6DoF pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4561\u20144570 (2019)","DOI":"10.1109\/CVPR.2019.00469"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Zakharov, S., Shugurov, I., Ilic, S.: DPOD: 6D pose object detector and refiner. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1941\u20131950 (2019)","DOI":"10.1109\/ICCV.2019.00203"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"He, Y., Sun, W., Huang, H., Liu, J., Fan, H., Sun, J.: PVN3D: a deep point-wise 3D keypoints voting network for 6dof pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11632\u201311641 (2020)","DOI":"10.1109\/CVPR42600.2020.01165"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"He, Y., Huang, H., Fan, H., Chen, Q., Sun, J.:FFB6D: a full flow bidirectional fusion network for 6D pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3003\u20143013 (2021)","DOI":"10.1109\/CVPR46437.2021.00302"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Schmidt, T., Narayanan, V., Fox, D.: PoseCNN: a convolutional neural network for 6D object pose estimation in cluttered scenes (2017) arXiv preprint arXiv:1711.00199","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Kehl, W., Manhardt, F., Tombari, F., Ilic, S., Navab, N.: SSD-6D: making RGB-based 3D detection and 6D pose estimation great again. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1521\u20141529 (2017)","DOI":"10.1109\/ICCV.2017.169"},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Li, Z., Wang, G., Ji, Z.: CDPN: coordinates-based disentangled pose network for real-time RGB-based 6-DoF object pose estimation. In: CVF International Conference on Computer Vision (ICCV), pp. 7677\u20147686. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00777"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Wang, C., Xu, D., Zhu, Y., Mart\u00edn-Mart\u00edn, R., Lu, C., Fei-Fei, L., Savarese, S.: Densefusion: 6D object pose estimation by iterative dense fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3343\u20133352 (2019)","DOI":"10.1109\/CVPR.2019.00346"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Wang, H., Sridhar, S., Huang, J., Valentin, J., Song, S., Guibas, L.J.: Normalized object coordinate space for category-level 6D object pose and size estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2642\u20132651 (2019)","DOI":"10.1109\/CVPR.2019.00275"},{"key":"26_CR13","doi-asserted-by":"crossref","unstructured":"Tian, M., Ang, M.H., Lee, G.H.: Shape prior deformation for categorical 6D object pose and size estimation. In: Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXI 16, vol. 2020, pp. 530\u2014546. Springer (2020)","DOI":"10.1007\/978-3-030-58589-1_32"},{"key":"26_CR14","doi-asserted-by":"crossref","unstructured":"Chen, K., Dou, Q.: SGPA: structure-guided prior adaptation for category-level 6D object pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2773\u20142782 (2021)","DOI":"10.1109\/ICCV48922.2021.00277"},{"key":"26_CR15","doi-asserted-by":"crossref","unstructured":"Li, G., Zhu, D., Zhang, G., Shi, W., Zhang, T., Zhang, X., Li, J.: SD-pose: structural discrepancy aware category-level 6D object pose estimation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5685\u20135694 (2023)","DOI":"10.1109\/WACV56688.2023.00564"},{"issue":"1","key":"26_CR16","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"26_CR17","doi-asserted-by":"crossref","unstructured":"Park, K., Mousavian, A., Xiang, Y., Fox, D.: Latentfusion: end-to-end differentiable reconstruction and rendering for unseen object pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10710\u201310719 (2020)","DOI":"10.1109\/CVPR42600.2020.01072"},{"key":"26_CR18","doi-asserted-by":"crossref","unstructured":"Shugurov, I., Li, F., Busam, B., Ilic, S.: OSOP: a multi-stage one shot object pose estimation framework. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6835\u20136844 (2022)","DOI":"10.1109\/CVPR52688.2022.00671"},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"He, Y., Wang, Y., Fan, H., Sun, J., Chen, Q.: FS6D: Few-shot 6D pose estimation of novel objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6814\u20146824 (2022)","DOI":"10.1109\/CVPR52688.2022.00669"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Y., Wen, Y., Peng, S., Lin, C., Long, X., Komura, T., Wang, W.: Gen6D: generalizable model-free 6-DoF object pose estimation from RGB images. In: European Conference on Computer Vision, pp. 298\u2013315. Springer (2022)","DOI":"10.1007\/978-3-031-19824-3_18"},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"Sun, J., Wang, Z., Zhang, S., He, X., Zhao, H., Zhang, G., Zhou, X.: Onepose: one-shot object pose estimation without cad models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6825\u20136834 (2022)","DOI":"10.1109\/CVPR52688.2022.00670"},{"key":"26_CR22","unstructured":"He, X., Sun, J., Wang, Y., Huang, D., Bao, H., Zhou, X.: Onepose++: keypoint-free one-shot object pose estimation without cad models. In: Advances in Neural Information Processing Systems, vol.\u00a035, pp. 35103\u201335115 (2022)"},{"key":"26_CR23","unstructured":"Chang, A.X., Funkhouser, T., Guibas, L., Hanrahan, P., Huang, Q., Li, Z., Savarese, S., Savva, M., Song, S., Su, H., et\u00a0al.: Shapenet: an information-rich 3D model repository (2015). arXiv preprint arXiv:1512.03012"},{"key":"26_CR24","doi-asserted-by":"crossref","unstructured":"Wang, Q., Wang, Z., Genova, K., Srinivasan, P.P., Zhou, H., Barron, J.T., Martin-Brualla, R., Snavely, N., Funkhouser, T.: IBRNet: learning multi-view image-based rendering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4690\u20144699 (2021)","DOI":"10.1109\/CVPR46437.2021.00466"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Hinterstoisser, S., Lepetit, V., Ilic, S., Holzer, S., Bradski, G., Konolige, K., Navab, N.: Model based training, detection and pose estimation of texture-less 3D objects in heavily cluttered scenes. In: Computer Vision\u2013ACCV 2012: 11th Asian Conference on Computer Vision, Daejeon, Korea, November 5\u20139, 2012, Revised Selected Papers, Part I 11, pp. 548\u2013562. Springer (2013)","DOI":"10.1007\/978-3-642-37331-2_42"},{"key":"26_CR26","doi-asserted-by":"crossref","unstructured":"Wohlhart, P., Lepetit, V.: Learning descriptors for object recognition and 3d pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3109\u20133118 (2015)","DOI":"10.1109\/CVPR.2015.7298930"},{"key":"26_CR27","doi-asserted-by":"crossref","unstructured":"Hinterstoisser, S., Holzer, S., Cagniart, C., Ilic, S., Konolige, K., Navab, N., Lepetit, V.: Multimodal templates for real-time detection of texture-less objects in heavily cluttered scenes. In: International Conference on Computer Vision, vol. 2011, pp. 858\u2013865. IEEE (2011)","DOI":"10.1109\/ICCV.2011.6126326"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8508-7_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T06:17:04Z","timestamp":1730528224000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8508-7_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9789819785070","9789819785087"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8508-7_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}