{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T19:10:08Z","timestamp":1745608208476,"version":"3.40.4"},"publisher-location":"Singapore","reference-count":85,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819658114","type":"print"},{"value":"9789819658121","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-5812-1_16","type":"book-chapter","created":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T18:45:11Z","timestamp":1745606711000},"page":"294-315","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Extreme Two-View Geometry From Object Poses with\u00a0Diffusion Models"],"prefix":"10.1007","author":[{"given":"Yujing","family":"Sun","sequence":"first","affiliation":[]},{"given":"Caiyi","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yuan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yuexin","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Siu Ming","family":"Yiu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,26]]},"reference":[{"issue":"9","key":"16_CR1","doi-asserted-by":"publisher","first-page":"2947","DOI":"10.1109\/TCSVT.2020.2973068","volume":"30","author":"S Abdulwahab","year":"2020","unstructured":"Abdulwahab, S., Rashwan, H.A., Garcia, M.A., Jabreel, M., Chambon, S., Puig, D.: Adversarial learning for depth and viewpoint estimation from a single image. IEEE Trans. Circuits Syst. Video Technol. 30(9), 2947\u20132958 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Arnold, E., et al.: Map-free visual relocalization: metric pose relative to a single image. In: European Conference on Computer Vision, pp. 690\u2013708. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-19769-7_40"},{"key":"16_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1007\/11744023_32","volume-title":"Computer Vision \u2013 ECCV 2006","author":"H Bay","year":"2006","unstructured":"Bay, H., Tuytelaars, T., Van Gool, L.: SURF: speeded up robust features. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) ECCV 2006. LNCS, vol. 3951, pp. 404\u2013417. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11744023_32"},{"key":"16_CR4","unstructured":"Bhat, S.F., Birkl, R., Wofk, D., Wonka, P., M\u00fcller, M.: Zoedepth: zero-shot transfer by combining relative and metric depth. arXiv preprint arXiv:2302.12288 (2023)"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"Cai, D., Heikkil\u00e4, J., Rahtu, E.: Ove6d: object viewpoint encoding for depth-based 6d object pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6803\u20136813 (2022)","DOI":"10.1109\/CVPR52688.2022.00668"},{"issue":"6","key":"16_CR6","doi-asserted-by":"publisher","first-page":"1874","DOI":"10.1109\/TRO.2021.3075644","volume":"37","author":"C Campos","year":"2021","unstructured":"Campos, C., Elvira, R., Rodr\u00edguez, J., Montiel, J.M., Tard\u00f3s, J.D.: ORB-SLAM3: an accurate open-source library for visual, visual-inertial, and multimap slam. IEEE Trans. Rob. 37(6), 1874\u20131890 (2021)","journal-title":"IEEE Trans. Rob."},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Castro, P., Kim, T.-K.: Posematcher: one-shot 6d object pose estimation by deep feature matching. In: 2023 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW), pp. 2140\u20132149 (2023)","DOI":"10.1109\/ICCVW60793.2023.00229"},{"key":"16_CR8","unstructured":"Chang, A.X., et\u00a0al.: Shapenet: an information-rich 3D model repository. arXiv preprint arXiv:1512.03012 (2015)"},{"key":"16_CR9","unstructured":"Cheng, W., Cao, Y.-P., Shan, Y.: Id-pose: sparse-view camera pose estimation by inverting diffusion models. arXiv preprint arXiv:2306.17140 (2023)"},{"key":"16_CR10","unstructured":"Choy, C.B., Gwak, J., Savarese, S., Chandraker, M.: Universal correspondence network. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"16_CR11","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1007\/978-981-97-2092-7_8","volume-title":"Computational Visual Media","author":"X Cui","year":"2024","unstructured":"Cui, X., Li, N., Zhang, C., Zhang, Q., Feng, W., Wan, L.: Silhouette-based 6d object pose estimation. In: Zhang, F.-L., Sharf, A. (eds.) Computational Visual Media, pp. 157\u2013179. Springer, Singapore (2024)"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Dani, M., Narain, K., Hebbalaguppe, R.: 3Dposelite: a compact 3D pose estimation using node embeddings. In: 2021 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1877\u20131886 (2021)","DOI":"10.1109\/WACV48630.2021.00192"},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Deitke, M., et al.: Objaverse: a universe of annotated 3D objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13142\u201313153 (2023)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Downs, L., et al.: Google scanned objects: a high-quality dataset of 3D scanned household items. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 2553\u20132560. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811809"},{"issue":"2","key":"16_CR15","doi-asserted-by":"publisher","first-page":"1098","DOI":"10.1109\/TCSVT.2023.3290617","volume":"34","author":"G Feng","year":"2024","unstructured":"Feng, G., Xu, T.-B., Liu, F., Liu, M., Wei, Z.: NVR-net: normal vector guided regression network for disentangled 6D pose estimation. IEEE Trans. Circuits Syst. Video Technol. 34(2), 1098\u20131113 (2024)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"7","key":"16_CR16","doi-asserted-by":"publisher","first-page":"3358","DOI":"10.1109\/TCSVT.2022.3233191","volume":"33","author":"D Fu","year":"2023","unstructured":"Fu, D., Han, S., Liang, B., Li, W.: The 6D pose estimation of the aircraft using geometric property. IEEE Trans. Circuits Syst. Video Technol. 33(7), 3358\u20133368 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Grabner, A., et al.: Geometric correspondence fields: learned differentiable rendering for 3D pose refinement in the wild (2020)","DOI":"10.1007\/978-3-030-58517-4_7"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Hartley, R., Zisserman, A.: Multiple view geometry in computer vision. Cambridge University Press (2003)","DOI":"10.1017\/CBO9780511811685"},{"key":"16_CR19","unstructured":"He, X., Sun, J., Wang, Y., Huang, D., Bao, H., Zhou, X.: Onepose++: keypoint-free one-shot object pose estimation without CAD models. In: Advances in Neural Information Processing Systems (2022)"},{"key":"16_CR20","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"Irshad, M.Z., Kollar, T., Laskey, M., Stone, K., Kira, Z.: Centersnap: single-shot multi-object 3D shape reconstruction and categorical 6d pose and size estimation. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 10632\u201310640. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811799"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Irshad, M.Z., Zakharov, S., Ambrus, R., Kollar, T., Kira, Z., Gaidon, A.: Shapo: implicit representations for multi-object shape, appearance, and pose optimization. In: European Conference on Computer Vision, pp. 275\u2013292. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-20086-1_16"},{"key":"16_CR23","unstructured":"Jampani, V., et al.: NAVI: category-agnostic image collections with high-quality 3D shape and pose annotations. In: NeurIPS (2023)"},{"key":"16_CR24","unstructured":"Jiang, H., Jiang, Z., Grauman, K., Zhu, Y.: Few-view object reconstruction with unknown categories and camera poses. arXiv preprint arXiv:2212.04492 (2022)"},{"key":"16_CR25","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M.J., Jacobs, D.W., Malik, J.: End-to-end recovery of human shape and pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7122\u20137131 (2018)","DOI":"10.1109\/CVPR.2018.00744"},{"key":"16_CR26","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Zhang, J.Y., Felsen, P., Malik, J.: Learning 3D human dynamics from video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5614\u20135623 (2019)","DOI":"10.1109\/CVPR.2019.00576"},{"key":"16_CR27","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. arXiv preprint arXiv:2304.02643 (2023)"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Athanasiou, N., Black, M.J.: Vibe: video inference for human body pose and shape estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5253\u20135263 (2020)","DOI":"10.1109\/CVPR42600.2020.00530"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Li, F., et al.: NeRF-pose: a first-reconstruct-then-regress approach for weakly-supervised 6D object pose estimation. In: 2023 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW), pp. 2115\u20132125 (2023)","DOI":"10.1109\/ICCVW60793.2023.00226"},{"key":"16_CR30","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, G., Ji, X., Xiang, Y., Fox, D.: DeepIM: deep iterative matching for 6D pose estimation. In: European Conference Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01231-1_42"},{"key":"16_CR31","doi-asserted-by":"crossref","unstructured":"Lin, A., Zhang, J.Y., Ramanan, D., Tulsiani, S.: Relpose++: recovering 6D poses from sparse-view observations. arXiv preprint arXiv:2305.04926 (2023)","DOI":"10.1109\/3DV62453.2024.00126"},{"issue":"5","key":"16_CR32","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1109\/TPAMI.2010.147","volume":"33","author":"C Liu","year":"2010","unstructured":"Liu, C., Yuen, J., Torralba, A.: Sift flow: dense correspondence across scenes and its applications. IEEE Trans. Pattern Anal. Mach. Intell. 33(5), 978\u2013994 (2010)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"10","key":"16_CR33","doi-asserted-by":"publisher","first-page":"6728","DOI":"10.1109\/TCSVT.2022.3169144","volume":"32","author":"J Liu","year":"2022","unstructured":"Liu, J., Cao, Z., Tang, Y., Liu, X., Tan, M.: Category-level 6D object pose estimation with structure encoder and reasoning attention. IEEE Trans. Circuits Syst. Video Technol. 32(10), 6728\u20136740 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"16_CR34","unstructured":"Liu, M., et\u00a0al.: One-2-3-45: any single image to 3d mesh in 45 seconds without per-shape optimization. arXiv preprint arXiv:2306.16928 (2023)"},{"key":"16_CR35","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3D object. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309 (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"16_CR36","unstructured":"Liu, Y., et al.: Syncdreamer: generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)"},{"key":"16_CR37","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Gen6D: generalizable model-free 6-DoF object pose estimation from RGB images. In: European Conference on Computer Vision, pp. 298\u2013315. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-19824-3_18"},{"key":"16_CR38","doi-asserted-by":"crossref","unstructured":"Long, X., et\u00a0al.: Wonder3D: single image to 3D using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023)","DOI":"10.1109\/CVPR52733.2024.00951"},{"issue":"5828","key":"16_CR39","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1038\/293133a0","volume":"293","author":"HC Longuet-Higgins","year":"1981","unstructured":"Longuet-Higgins, H.C.: A computer algorithm for reconstructing a scene from two projections. Nature 293(5828), 133\u2013135 (1981)","journal-title":"Nature"},{"key":"16_CR40","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vision 60, 91\u2013110 (2004)","journal-title":"Int. J. Comput. Vision"},{"key":"16_CR41","doi-asserted-by":"crossref","unstructured":"Ma, W.-C., Yang, A.J., Wang, S., Urtasun, R., Torralba, A.: Virtual correspondence: humans as a cue for extreme-view geometry. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15924\u201315934 (2022)","DOI":"10.1109\/CVPR52688.2022.01546"},{"issue":"4","key":"16_CR42","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073596","volume":"36","author":"D Mehta","year":"2017","unstructured":"Mehta, D., et al.: VNECT: real-time 3D human pose estimation with a single RGB camera. ACM Trans. Graph. (TOG) 36(4), 1\u201314 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"16_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1007\/978-3-319-70353-4_57","volume-title":"Advanced Concepts for Intelligent Vision Systems","author":"I Melekhov","year":"2017","unstructured":"Melekhov, I., Ylioinas, J., Kannala, J., Rahtu, E.: Relative camera pose estimation using convolutional neural networks. In: Blanc-Talon, J., Penne, R., Philips, W., Popescu, D., Scheunders, P. (eds.) ACIVS 2017. LNCS, vol. 10617, pp. 675\u2013687. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-70353-4_57"},{"issue":"1","key":"16_CR44","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"16_CR45","doi-asserted-by":"crossref","unstructured":"Mou, C., et al.: T2i-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)","DOI":"10.1609\/aaai.v38i5.28226"},{"issue":"5","key":"16_CR46","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1109\/TRO.2015.2463671","volume":"31","author":"R Mur-Artal","year":"2015","unstructured":"Mur-Artal, R., Montiel, J., Tardos, J.D.: ORB-slam: a versatile and accurate monocular slam system. IEEE Trans. Rob. 31(5), 1147\u20131163 (2015)","journal-title":"IEEE Trans. Rob."},{"issue":"5","key":"16_CR47","doi-asserted-by":"publisher","first-page":"1255","DOI":"10.1109\/TRO.2017.2705103","volume":"33","author":"R Mur-Artal","year":"2017","unstructured":"Mur-Artal, R., Tard\u00f3s, J.D.: ORB-slam2: an open-source slam system for monocular, stereo, and RGB-D cameras. IEEE Trans. Rob. 33(5), 1255\u20131262 (2017)","journal-title":"IEEE Trans. Rob."},{"key":"16_CR48","doi-asserted-by":"crossref","unstructured":"Nguyen, V.N., Groueix, T., Hu, Y., Salzmann, M., Lepetit, V.: Nope: novel object pose estimation from a single image. arXiv preprint arXiv:2303.13612 (2023)","DOI":"10.1109\/CVPR52733.2024.01697"},{"key":"16_CR49","doi-asserted-by":"crossref","unstructured":"Nguyen, V.N., Groueix, T., Salzmann, M., Lepetit, V.: Gigapose: fast and robust novel object pose estimation via one correspondence. arXiv preprint arXiv:2311.14155 (2023)","DOI":"10.1109\/CVPR52733.2024.00945"},{"key":"16_CR50","doi-asserted-by":"crossref","unstructured":"Nguyen, V.N., Hu, Y., Xiao, Y., Salzmann, M., Lepetit, V.: Templates for 3D object pose estimation revisited: Generalization to new objects and robustness to occlusions. In: Proceedings IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.00665"},{"issue":"6","key":"16_CR51","doi-asserted-by":"publisher","first-page":"756","DOI":"10.1109\/TPAMI.2004.17","volume":"26","author":"D Nist\u00e9r","year":"2004","unstructured":"Nist\u00e9r, D.: An efficient solution to the five-point relative pose problem. IEEE Trans. Pattern Anal. Mach. Intell. 26(6), 756\u2013770 (2004)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR52","doi-asserted-by":"crossref","unstructured":"Okorn, B., Gu, Q., Hebert, M., Held, D.: Zephyr: zero-shot pose hypothesis rating. In: 2021 IEEE International Conference on Robotics and Automation (ICRA), pp. 14141\u201314148. IEEE (2021)","DOI":"10.1109\/ICRA48506.2021.9560874"},{"issue":"8","key":"16_CR53","doi-asserted-by":"publisher","first-page":"2683","DOI":"10.1109\/TCSVT.2019.2929600","volume":"30","author":"C Papaioannidis","year":"2020","unstructured":"Papaioannidis, C., Pitas, I.: 3D object pose estimation using multi-objective quaternion learning. IEEE Trans. Circuits Syst. Video Technol. 30(8), 2683\u20132693 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"16_CR54","doi-asserted-by":"crossref","unstructured":"Park, K., Mousavian, A., Xiang, Y., Fox, D.: Latentfusion: end-to-end differentiable reconstruction and rendering for unseen object pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.01072"},{"key":"16_CR55","doi-asserted-by":"crossref","unstructured":"Pitteri, G., Ilic, S., Lepetit, V.: Cornet: generic 3D corners for 6D pose estimation of new objects without retraining. In: 2019 IEEE\/CVF International Conference on Computer Vision Workshop (ICCVW), pp. 2807\u20132815 (2019)","DOI":"10.1109\/ICCVW.2019.00342"},{"key":"16_CR56","doi-asserted-by":"crossref","unstructured":"Reizenstein, J., Shapovalov, R., Henzler, P., Sbordone, L., Labatut, P., Novotny, D.: Common objects in 3D: large-scale learning and evaluation of real-life 3D category reconstruction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10901\u201310911 (2021)","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"16_CR57","doi-asserted-by":"crossref","unstructured":"Rockwell, C., Johnson, J., Fouhey, D.F.: The 8-point algorithm as an inductive bias for relative pose prediction by ViTs. In: 2022 International Conference on 3D Vision (3DV), pp. 1\u201311. IEEE (2022)","DOI":"10.1109\/3DV57658.2022.00028"},{"key":"16_CR58","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"16_CR59","doi-asserted-by":"crossref","unstructured":"Sarlin, P.-E., DeTone, D., Malisiewicz, T., Rabinovich, A.: Superglue: learning feature matching with graph neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4938\u20134947 (2020)","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"16_CR60","doi-asserted-by":"crossref","unstructured":"Schonberger, J.L., Frahm, J.-M.: Structure-from-motion revisited. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4104\u20134113 (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"16_CR61","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1007\/978-3-319-46487-9_31","volume-title":"Computer Vision \u2013 ECCV 2016","author":"JL Sch\u00f6nberger","year":"2016","unstructured":"Sch\u00f6nberger, J.L., Zheng, E., Frahm, J.-M., Pollefeys, M.: Pixelwise view selection for unstructured multi-view stereo. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 501\u2013518. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_31"},{"key":"16_CR62","unstructured":"Schuhmann, C., et al.: Laion-5b: an open large-scale dataset for training next generation image-text models. In: Advances in Neural Information Processing Systems, vol. 35, pp. 25278\u201325294 (2022)"},{"key":"16_CR63","unstructured":"Shi, R., et al.: Zero123++: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110 (2023)"},{"key":"16_CR64","doi-asserted-by":"crossref","unstructured":"Shugurov, I., Li, F., Busam, B., Ilic, S.: Osop: a multi-stage one shot object pose estimation framework. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6825\u20136834 (2022)","DOI":"10.1109\/CVPR52688.2022.00671"},{"key":"16_CR65","doi-asserted-by":"crossref","unstructured":"Sinha, S., Zhang, J.Y., Tagliasacchi, A., Gilitschenski, I., Lindell, D.B.: Sparsepose: sparse-view camera pose regression and refinement. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21349\u201321359 (2023)","DOI":"10.1109\/CVPR52729.2023.02045"},{"key":"16_CR66","doi-asserted-by":"crossref","unstructured":"Sun, J., Shen, Z., Wang, Y., Bao, H., Zhou, X.: LoFTR: detector-free local feature matching with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8922\u20138931 (2021)","DOI":"10.1109\/CVPR46437.2021.00881"},{"key":"16_CR67","doi-asserted-by":"crossref","unstructured":"Sun, J., et al.: Onepose: one-shot object pose estimation without cad models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6825\u20136834 (2022)","DOI":"10.1109\/CVPR52688.2022.00670"},{"key":"16_CR68","unstructured":"Teed, Z., Deng, J.: Droid-slam: Deep visual slam for monocular, stereo, and RGB-D cameras. In: Advances in Neural Information Processing Systems, vol. 34, pp. 16558\u201316569 (2021)"},{"issue":"5","key":"16_CR69","doi-asserted-by":"publisher","first-page":"815","DOI":"10.1109\/TPAMI.2009.77","volume":"32","author":"E Tola","year":"2009","unstructured":"Tola, E., Lepetit, V., Fua, P.: Daisy: an efficient dense descriptor applied to wide-baseline stereo. IEEE Trans. Pattern Anal. Mach. Intell. 32(5), 815\u2013830 (2009)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR70","doi-asserted-by":"crossref","unstructured":"Usman, B., Tagliasacchi, A., Saenko, K., Sud, A.: Metapose: fast 3d pose from multiple views without 3D supervision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6759\u20136770 (2022)","DOI":"10.1109\/CVPR52688.2022.00664"},{"key":"16_CR71","unstructured":"Wang, P., et al.: PF-LRM: pose-free large reconstruction model for joint pose and shape prediction. arXiv preprint arXiv:2311.12024 (2023)"},{"key":"16_CR72","doi-asserted-by":"crossref","unstructured":"Wang, S., Clark, R., Wen, H., Trigoni, N.: Deepvo: towards end-to-end visual odometry with deep recurrent convolutional neural networks. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 2043\u20132050. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989236"},{"key":"16_CR73","doi-asserted-by":"crossref","unstructured":"Wen, B., Bekris, K.E.: Bundletrack: 6D pose tracking for novel objects without instance or category-level 3D models. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (2021)","DOI":"10.1109\/IROS51168.2021.9635991"},{"key":"16_CR74","doi-asserted-by":"crossref","unstructured":"Wen, B., et al.: BundleSDF: neural 6-DoF tracking and 3d reconstruction of unknown objects. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00066"},{"key":"16_CR75","doi-asserted-by":"crossref","unstructured":"Wen, Y., et al.: Disp6D: disentangled implicit shape and pose learning for scalable 6d pose estimation. In: European Conference on Computer Vision, pp. 404\u2013421. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-20077-9_24"},{"key":"16_CR76","unstructured":"Xiao, Y., Qiu, X., Langlois, P., Aubry, M., Marlet, R.: Pose from shape: deep pose estimation for arbitrary 3D objects. In: British Machine Vision Conference (BMVC) (2019)"},{"key":"16_CR77","doi-asserted-by":"crossref","unstructured":"Yang, N., Stumberg, L.v., Wang, R., Cremers, D.: D3VO: deep depth, deep pose and deep uncertainty for monocular visual odometry. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1281\u20131292 (2020)","DOI":"10.1109\/CVPR42600.2020.00136"},{"key":"16_CR78","doi-asserted-by":"crossref","unstructured":"Yen-Chen, L., Florence, P., Barron, J.T., Rodriguez, A., Isola, P., Lin, T.-Y.: Inerf: inverting neural radiance fields for pose estimation (2021)","DOI":"10.1109\/IROS51168.2021.9636708"},{"key":"16_CR79","doi-asserted-by":"crossref","unstructured":"Yisheng, H., Yao, W., Haoqiang, F., Qifeng, C., Jian, S.: FS6D: few-shot 6d pose estimation of novel objects. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.00669"},{"key":"16_CR80","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Learning two-view correspondences and geometry using order-aware network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5845\u20135854 (2019)","DOI":"10.1109\/ICCV.2019.00594"},{"key":"16_CR81","doi-asserted-by":"crossref","unstructured":"Zhang, J.Y., Ramanan, D., Tulsiani, S.: Relpose: predicting probabilistic relative rotation for single objects in the wild. In: European Conference on Computer Vision, pp. 592\u2013611. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-19821-2_34"},{"key":"16_CR82","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"16_CR83","doi-asserted-by":"crossref","unstructured":"Zhao, C., Hu, Y., Salzmann, M.: Fusing local similarities for retrieval-based 3D orientation estimation of unseen objects (2022)","DOI":"10.1007\/978-3-031-19769-7_7"},{"key":"16_CR84","doi-asserted-by":"crossref","unstructured":"Zhao, C., Hu, Y., Salzmann, M.: Locposenet: robust location prior for unseen object pose estimation (2023)","DOI":"10.1109\/3DV62453.2024.00080"},{"key":"16_CR85","unstructured":"Zhao, C., Zhang, T., Salzmann, M.: 3D-aware hypothesis & verification for generalizable relative object pose estimation. In: Proceedings of the International Conference on Learning Representations (2024)"}],"container-title":["Lecture Notes in Computer Science","Computational Visual Media"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-5812-1_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T18:45:41Z","timestamp":1745606741000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-5812-1_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819658114","9789819658121"],"references-count":85,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-5812-1_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"26 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CVM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Visual Media","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hong Kong SAR","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cvm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iccvm.org\/2025\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}