{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T19:10:07Z","timestamp":1745608207664,"version":"3.40.4"},"publisher-location":"Singapore","reference-count":51,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819658114","type":"print"},{"value":"9789819658121","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-5812-1_19","type":"book-chapter","created":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T18:44:05Z","timestamp":1745606645000},"page":"367-387","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["CosCAD: Cross-Modal CAD Model Retrieval and\u00a0Pose Alignment from\u00a0a\u00a0Single Image"],"prefix":"10.1007","author":[{"given":"Zhikun","family":"Wen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Honghua","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhe","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zeyong","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liangliang","family":"Nan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingqiang","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,4,26]]},"reference":[{"key":"19_CR1","doi-asserted-by":"publisher","unstructured":"Avetisyan, A., Dahnert, M., Dai, A., Savva, M., Chang, A.X., Nie\u00dfner, M.: Scan2cad: learning CAD model alignment in RGB-D scans. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2019, Long Beach, CA, USA, 16\u201320 June 2019, pp. 2614\u20132623. Computer Vision Foundation\/IEEE (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00272","DOI":"10.1109\/CVPR.2019.00272"},{"key":"19_CR2","doi-asserted-by":"publisher","unstructured":"Avetisyan, A., Dai, A., Nie\u00dfner, M.: End-to-end CAD model retrieval and 9dof alignment in 3D scans. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), 27 October\u20132 November 2019, pp. 2551\u20132560. IEEE (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00264","DOI":"10.1109\/ICCV.2019.00264"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Binford, T.: Survey of model-based image analysis systems. Int. J. Robot. Res. (1982)","DOI":"10.1177\/027836498200100103"},{"key":"19_CR4","doi-asserted-by":"publisher","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J. (eds.) ECCV 2020, Part I. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"19_CR5","unstructured":"Chang, A.X., et al.: Shapenet: an information-rich 3D model repository. CoRR abs\/1512.03012 (2015). http:\/\/arxiv.org\/abs\/1512.03012"},{"issue":"11","key":"19_CR6","doi-asserted-by":"publisher","first-page":"3255","DOI":"10.1109\/TVCG.2019.2920817","volume":"26","author":"H Chen","year":"2019","unstructured":"Chen, H., Wei, M., Sun, Y., Xie, X., Wang, J.: Multi-patch collaborative point cloud denoising via low-rank recovery with graph constraint. IEEE Trans. Vis. Comput. Graph. 26(11), 3255\u20133270 (2019)","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"issue":"3","key":"19_CR7","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1007\/s11263-021-01564-7","volume":"130","author":"H Chen","year":"2022","unstructured":"Chen, H., Wei, Z., Li, X., Xu, Y., Wei, M., Wang, J.: RePCD-net: feature-aware recurrent point cloud denoising network. Int. J. Comput. Vision 130(3), 615\u2013629 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Chen, H., Wei, Z., Xu, Y., Wei, M., Wang, J.: Imlovenet: misaligned image-supported registration network for low-overlap point cloud pairs. In: ACM SIGGRAPH 2022 Conference Proceedings, pp.\u00a01\u20139 (2022)","DOI":"10.1145\/3528233.3530744"},{"key":"19_CR9","doi-asserted-by":"publisher","unstructured":"Chen, Y., et al.: UNITER: universal image-text representation learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J. (eds.) ECCV 2020, Part XXX. LNCS, vol. 12375, pp. 104\u2013120. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_7","DOI":"10.1007\/978-3-030-58577-8_7"},{"issue":"1","key":"19_CR10","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1145\/6462.6464","volume":"18","author":"RT Chin","year":"1986","unstructured":"Chin, R.T., Dyer, C.R.: Model-based recognition in robot vision. ACM Comput. Surv. 18(1), 67\u2013108 (1986). https:\/\/doi.org\/10.1145\/6462.6464","journal-title":"ACM Comput. Surv."},{"key":"19_CR11","doi-asserted-by":"publisher","unstructured":"Choy, C.B., Xu, D., Gwak, J., Chen, K., Savarese, S.: 3D-R2N2: a unified approach for single and multi-view 3D object reconstruction. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part VIII. LNCS, vol.\u00a09912, pp. 628\u2013644. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_38","DOI":"10.1007\/978-3-319-46484-8_38"},{"key":"19_CR12","doi-asserted-by":"publisher","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T.A., Nie\u00dfner, M.: Scannet: richly-annotated 3D reconstructions of indoor scenes. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, Honolulu, HI, USA, 21\u201326 July 2017, pp. 2432\u20132443. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.261","DOI":"10.1109\/CVPR.2017.261"},{"key":"19_CR13","doi-asserted-by":"publisher","unstructured":"Duan, K., Bai, S., Xie, L., Qi, H., Huang, Q., Tian, Q.: Centernet: keypoint triplets for object detection. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), 27 October\u20132 November 2019, pp. 6568\u20136577. IEEE (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00667","DOI":"10.1109\/ICCV.2019.00667"},{"key":"19_CR14","doi-asserted-by":"publisher","unstructured":"Engelmann, F., Rematas, K., Leibe, B., Ferrari, V.: From points to multi-object 3D reconstruction. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, virtual, 19\u201325 June 2021, pp. 4588\u20134597. Computer Vision Foundation\/IEEE (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00456","DOI":"10.1109\/CVPR46437.2021.00456"},{"key":"19_CR15","doi-asserted-by":"publisher","unstructured":"Fan, H., Su, H., Guibas, L.J.: A point set generation network for 3D object reconstruction from a single image. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, Honolulu, HI, USA, 21\u201326 July 2017, pp. 2463\u20132471. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.264","DOI":"10.1109\/CVPR.2017.264"},{"issue":"12","key":"19_CR16","doi-asserted-by":"publisher","first-page":"3313","DOI":"10.1007\/S11263-021-01534-Z","volume":"129","author":"H Fu","year":"2021","unstructured":"Fu, H., et al.: 3D-future: 3D furniture shape with texture. Int. J. Comput. Vis. 129(12), 3313\u20133337 (2021). https:\/\/doi.org\/10.1007\/S11263-021-01534-Z","journal-title":"Int. J. Comput. Vis."},{"key":"19_CR17","unstructured":"Gionis, A., Indyk, P., Motwani, R., et\u00a0al.: Similarity search in high dimensions via hashing. In: VLDB, vol.\u00a099, pp. 518\u2013529 (1999)"},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Malik, J., Johnson, J.: Mesh R-CNN. CoRR abs\/1906.02739 (2019). http:\/\/arxiv.org\/abs\/1906.02739","DOI":"10.1109\/ICCV.2019.00988"},{"key":"19_CR19","doi-asserted-by":"publisher","unstructured":"Groueix, T., Fisher, M., Kim, V.G., Russell, B.C., Aubry, M.: A papier-m\u00e2ch\u00e9 approach to learning 3D surface generation. In: 2018 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2018, Salt Lake City, UT, USA, 18\u201322 June 2018, pp. 216\u2013224. Computer Vision Foundation\/IEEE Computer Society (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00030","DOI":"10.1109\/CVPR.2018.00030"},{"key":"19_CR20","doi-asserted-by":"publisher","unstructured":"G\u00fcmeli, C., Dai, A., Nie\u00dfner, M.: ROCA: robust CAD model retrieval and alignment from a single image. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, 18\u201324 June 2022, pp. 4012\u20134021. IEEE (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.00399","DOI":"10.1109\/CVPR52688.2022.00399"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"19_CR22","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, 27\u201330 June 2016, pp. 770\u2013778. IEEE Computer Society (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"issue":"1","key":"19_CR23","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/TPAMI.2010.57","volume":"33","author":"H Jegou","year":"2010","unstructured":"Jegou, H., Douze, M., Schmid, C.: Product quantization for nearest neighbor search. IEEE Trans. Pattern Anal. Mach. Intell. 33(1), 117\u2013128 (2010)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"19_CR24","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: Ultralytics YOLO (2023). https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"19_CR25","doi-asserted-by":"publisher","unstructured":"Kuo, W., Angelova, A., Lin, T., Dai, A.: Mask2cad: 3D shape prediction by learning to segment and retrieve. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J. (eds.) ECCV 2020, Part III. LNCS, vol. 12348, pp. 260\u2013277. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58580-8_16","DOI":"10.1007\/978-3-030-58580-8_16"},{"key":"19_CR26","doi-asserted-by":"publisher","unstructured":"Kuo, W., Angelova, A., Malik, J., Lin, T.: Shapemask: learning to segment novel objects by refining shape priors. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), 27 October\u20132 November 2019, pp. 9206\u20139215. IEEE (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00930","DOI":"10.1109\/ICCV.2019.00930"},{"key":"19_CR27","unstructured":"Langer, F., Bae, G., Budvytis, I., Cipolla, R.: SPARC: sparse render-and-compare for CAD model alignment in a single RGB image. In: 33rd British Machine Vision Conference 2022, BMVC 2022, London, UK, 21\u201324 November 2022, p.\u00a072. BMVA Press (2022). https:\/\/bmvc2022.mpi-inf.mpg.de\/72\/"},{"key":"19_CR28","unstructured":"Li, J., Selvaraju, R.R., Gotmare, A., Joty, S.R., Xiong, C., Hoi, S.C.: Align before fuse: vision and language representation learning with momentum distillation. In: Ranzato, M., Beygelzimer, A., Dauphin, Y.N., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, 6\u201314 December 2021, Virtual, pp. 9694\u20139705 (2021)"},{"key":"19_CR29","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C., Chang, K.: VisualBERT: a simple and performant baseline for vision and language. CoRR abs\/1908.03557 (2019). http:\/\/arxiv.org\/abs\/1908.03557"},{"key":"19_CR30","doi-asserted-by":"publisher","unstructured":"Li, L.H., et al.: Grounded language-image pre-training. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, 18\u201324 June 2022, pp. 10955\u201310965. IEEE (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01069","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"19_CR31","doi-asserted-by":"publisher","unstructured":"Li, X., et al.: Oscar: object-semantics aligned pre-training for vision-language tasks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J. (eds.) ECCV 2020, Part XXX. LNCS, vol. 12375, pp. 121\u2013137. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_8","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"19_CR32","doi-asserted-by":"publisher","unstructured":"Lin, T., Doll\u00e1r, P., Girshick, R.B., He, K., Hariharan, B., Belongie, S.J.: Feature pyramid networks for object detection. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, Honolulu, HI, USA, 21\u201326 July 2017, pp. 936\u2013944. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.106","DOI":"10.1109\/CVPR.2017.106"},{"key":"19_CR33","doi-asserted-by":"publisher","unstructured":"Lin, T., Goyal, P., Girshick, R.B., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, 22\u201329 October 2017, pp. 2999\u20133007. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.324","DOI":"10.1109\/ICCV.2017.324"},{"key":"19_CR34","unstructured":"Liu, F., Liu, X.: Voxel-based 3D detection and reconstruction of multiple objects from a single image. In: Ranzato, M., Beygelzimer, A., Dauphin, Y.N., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, 6\u201314 December 2021, Virtual, pp. 2413\u20132426 (2021)"},{"key":"19_CR35","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: VilBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Wallach, H.M., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E.B., Garnett, R. (eds.) Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, 8\u201314 December 2019, Vancouver, BC, Canada, pp. 13\u201323 (2019)"},{"issue":"4","key":"19_CR36","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2018.2889473","volume":"42","author":"YA Malkov","year":"2018","unstructured":"Malkov, Y.A., Yashunin, D.A.: Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE Trans. Pattern Anal. Mach. Intell. 42(4), 824\u2013836 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"19_CR37","doi-asserted-by":"publisher","first-page":"1320","DOI":"10.1109\/TPAMI.2022.3146082","volume":"45","author":"K Maninis","year":"2023","unstructured":"Maninis, K., Popov, S., Nie\u00dfner, M., Ferrari, V.: Vid2cad: CAD model alignment using multi-view constraints from videos. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 1320\u20131327 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2022.3146082","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"19_CR38","doi-asserted-by":"publisher","unstructured":"Mu, N., Kirillov, A., Wagner, D.A., Xie, S.: SLIP: self-supervision meets language-image pre-training. In: Avidan, S., Brostow, G.J., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XXVI. LNCS, vol. 13686, pp. 529\u2013544. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19809-0_30","DOI":"10.1007\/978-3-031-19809-0_30"},{"key":"19_CR39","doi-asserted-by":"publisher","unstructured":"Nie, Y., Han, X., Guo, S., Zheng, Y., Chang, J., Zhang, J.: Total3dunderstanding: joint layout, object pose and mesh reconstruction for indoor scenes from a single image. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, 13\u201319 June 2020, pp. 52\u201361. Computer Vision Foundation\/IEEE (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00013","DOI":"10.1109\/CVPR42600.2020.00013"},{"key":"19_CR40","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18\u201324 July 2021, Virtual Event. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 8748\u20138763. PMLR (2021)"},{"key":"19_CR41","unstructured":"Roberts, L.G.: Machine Perception of Three-Dimensional Solids. Outstanding Dissertations in the Computer Sciences, Garland Publishing, New York (1963)"},{"issue":"3","key":"19_CR42","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/S11263-015-0816-Y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015). https:\/\/doi.org\/10.1007\/S11263-015-0816-Y","journal-title":"Int. J. Comput. Vis."},{"key":"19_CR43","doi-asserted-by":"publisher","unstructured":"Tan, H., Bansal, M.: LXMERT: learning cross-modality encoder representations from transformers. In: Inui, K., Jiang, J., Ng, V., Wan, X. (eds.) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, EMNLP-IJCNLP 2019, Hong Kong, China, 3\u20137 November 2019, pp. 5099\u20135110. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/V1\/D19-1514","DOI":"10.18653\/V1\/D19-1514"},{"key":"19_CR44","doi-asserted-by":"publisher","unstructured":"Tan, M., Pang, R., Le, Q.V.: Efficientdet: scalable and efficient object detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, 13\u201319 June 2020, pp. 10778\u201310787. Computer Vision Foundation \/ IEEE (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01079","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"19_CR45","doi-asserted-by":"publisher","unstructured":"Tatarchenko, M., Dosovitskiy, A., Brox, T.: Octree generating networks: efficient convolutional architectures for high-resolution 3D outputs. In: IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, 22\u201329 October 2017, pp. 2107\u20132115. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.230","DOI":"10.1109\/ICCV.2017.230"},{"key":"19_CR46","doi-asserted-by":"publisher","unstructured":"Wang, N., Zhang, Y., Li, Z., Fu, Y., Liu, W., Jiang, Y.: Pixel2mesh: generating 3D mesh models from single RGB images. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018, Part XI. LNCS, vol. 11215, pp. 55\u201371. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01252-6_4","DOI":"10.1007\/978-3-030-01252-6_4"},{"key":"19_CR47","doi-asserted-by":"publisher","unstructured":"Wu, Z., et al.: 3D shapenets: a deep representation for volumetric shapes. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2015, Boston, MA, USA, 7\u201312 June 2015, pp. 1912\u20131920. IEEE Computer Society (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298801","DOI":"10.1109\/CVPR.2015.7298801"},{"key":"19_CR48","doi-asserted-by":"publisher","unstructured":"Zhang, C., Cui, Z., Zhang, Y., Zeng, B., Pollefeys, M., Liu, S.: Holistic 3D scene understanding from a single image with implicit representation. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, virtual, 19\u201325 June 2021, pp. 8833\u20138842. Computer Vision Foundation\/IEEE (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00872","DOI":"10.1109\/CVPR46437.2021.00872"},{"key":"19_CR49","doi-asserted-by":"publisher","unstructured":"Zhang, R., et al.: Pointclip: point cloud understanding by CLIP. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, 18\u201324 June 2022, pp. 8542\u20138552. IEEE (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.00836","DOI":"10.1109\/CVPR52688.2022.00836"},{"key":"19_CR50","doi-asserted-by":"publisher","unstructured":"Zheng, L., et al.: HS-pose: hybrid scope feature extraction for category-level object pose estimation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, 17\u201324 June 2023, pp. 17163\u201317173. IEEE (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01646","DOI":"10.1109\/CVPR52729.2023.01646"},{"key":"19_CR51","doi-asserted-by":"crossref","unstructured":"Zhou, H., et al.: Geometry and learning co-supported normal estimation for unstructured point cloud. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13238\u201313247 (2020)","DOI":"10.1109\/CVPR42600.2020.01325"}],"container-title":["Lecture Notes in Computer Science","Computational Visual Media"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-5812-1_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T18:44:16Z","timestamp":1745606656000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-5812-1_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819658114","9789819658121"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-5812-1_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"26 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CVM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Visual Media","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hong Kong SAR","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cvm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iccvm.org\/2025\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}