{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:05:08Z","timestamp":1765339508472,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":84,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["61902415"],"award-info":[{"award-number":["61902415"]}]},{"name":"Science and Technology on Parallel and Distributed Processing Laboratory","award":["WDZC20235250106"],"award-info":[{"award-number":["WDZC20235250106"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755722","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:55:00Z","timestamp":1761375300000},"page":"11081-11090","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Mono3R: Exploiting Monocular Cues for Geometric 3D Reconstruction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2218-6483","authenticated-orcid":false,"given":"Wenyu","family":"Li","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7715-4698","authenticated-orcid":false,"given":"Sidun","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6752-7892","authenticated-orcid":false,"given":"Peng","family":"Qiao","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1256-8934","authenticated-orcid":false,"given":"Yong","family":"Dou","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2006.21"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15552-9_3"},{"key":"e_1_3_2_1_3_1","volume-title":"Presence: Teleoperators and Virtual Environments","volume":"6","author":"Azuma Ronald","year":"1997","unstructured":"Ronald Azuma. 1997. A Survey of Augmented Reality. Presence: Teleoperators and Virtual Environments, Vol. 6, 4 (1997)."},{"volume-title":"ISPRS Congress and Exhibition.","author":"Baillard C.","key":"e_1_3_2_1_4_1","unstructured":"C. Baillard and A. Zisserman. 2000. A Plane-Sweep Strategy for the 3D Reconstruction of Buildings from Multiple Images. In ISPRS Congress and Exhibition."},{"key":"e_1_3_2_1_5_1","volume-title":"Delving deeper into convolutional networks for learning video representations. arXiv preprint arXiv:1511.06432","author":"Ballas Nicolas","year":"2015","unstructured":"Nicolas Ballas, Li Yao, Chris Pal, and Aaron Courville. 2015. Delving deeper into convolutional networks for learning video representations. arXiv preprint arXiv:1511.06432 (2015)."},{"key":"e_1_3_2_1_6_1","article-title":"PatchMatch: a randomized correspondence algorithm for structural image editing","volume":"28","author":"Barnes Connelly","year":"2009","unstructured":"Connelly Barnes, Eli Shechtman, Adam Finkelstein, and Dan B. Goldman. 2009. PatchMatch: a randomized correspondence algorithm for structural image editing. ACM Transaction on Graphics (Proc. SIGGRAPH), Vol. 28, 3 (2009).","journal-title":"ACM Transaction on Graphics (Proc. SIGGRAPH)"},{"key":"e_1_3_2_1_7_1","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1). https:\/\/openreview.net\/forum?id=tjZjv_qh_CE","author":"Baruch Gilad","year":"2021","unstructured":"Gilad Baruch, Zhuoyuan Chen, Afshin Dehghan, Tal Dimry, Yuri Feigin, Peter Fu, Thomas Gebauer, Brandon Joffe, Daniel Kurz, Arik Schwartz, and Elad Shulman. 2021. ARKitScenes - A Diverse Real-World Dataset for 3D Indoor Scene Understanding Using Mobile RGB-D Data. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1). https:\/\/openreview.net\/forum?id=tjZjv_qh_CE"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.281"},{"volume-title":"Designing autonomous agents","author":"Brooks R. A.","key":"e_1_3_2_1_9_1","unstructured":"R. A. Brooks. 1991. Elephants don't play chess. In Designing autonomous agents, P. Maes (Ed.). Bradford Books, MIT Press, Cambridge."},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. MCBR-CDS.","author":"Burner Andreas","year":"2011","unstructured":"Andreas Burner, Rene Donner, Marius Mayerhoefer, Markus Holzer, Franz Kainberger, and Georg Langs. 2011. Texture Bags: Anomaly Retrieval in Medical Images Based on Local 3D-Texture Similarity. In Proc. MCBR-CDS."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_2"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.257"},{"key":"e_1_3_2_1_13_1","volume-title":"SuperPoint: Self-Supervised Interest Point Detection and Description. arXiv","author":"Daniel DeTone","year":"2017","unstructured":"DeTone Daniel, Malisiewicz Tomasz, and Rabinovich Andrew. 2017. SuperPoint: Self-Supervised Interest Point Detection and Description. arXiv, Vol. 1712.07629 (2017)."},{"key":"e_1_3_2_1_14_1","first-page":"175","volume-title":"Denmark","author":"Deutscher Jonathan","year":"2002","unstructured":"Jonathan Deutscher, Michael Isard, and John MacCormick. 2002. Automatic camera calibration from a single manhattan image. In Computer Vision-ECCV 2002: 7th European Conference on Computer Vision Copenhagen, Denmark, May 28-31, 2002 Proceedings, Part IV 7. Springer, 175-188."},{"key":"e_1_3_2_1_15_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=YicbFdNTTy","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"e_1_3_2_1_16_1","volume-title":"MASt3R-SfM: a Fully-Integrated Solution for Unconstrained Structure-from-Motion. arXiv","author":"Duisterhof Bardienus","year":"1915","unstructured":"Bardienus Duisterhof, Lojze Zust, Philippe Weinzaepfel, Vincent Leroy, Yohann Cabon, and Jerome Revaud. 2024. MASt3R-SfM: a Fully-Integrated Solution for Unconstrained Structure-from-Motion. arXiv, Vol. 2409.19152 (2024)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539802"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00250"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.59"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073599"},{"key":"e_1_3_2_1_22_1","volume-title":"Grounding Image Matching in 3D with MASt3R. arXiv preprint arXiv:2406.09756","author":"Leroy Vincent","year":"2024","unstructured":"Vincent Leroy, Yohann Cabon, and J\u00e9r\u00f4me Revaud. 2024. Grounding Image Matching in 3D with MASt3R. arXiv preprint arXiv:2406.09756 (2024)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00218"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00032"},{"key":"e_1_3_2_1_25_1","volume-title":"Freeman","author":"Liu Ce","year":"2008","unstructured":"Ce Liu, Jenny Yuen, Antonio Torralba, Josef Sivic, and William T. Freeman. 2008. SIFT Flow: Dense Correspondence across Different Scenes. In Proc. ECCV."},{"key":"e_1_3_2_1_26_1","unstructured":"D. Lowe. 2007. Implementation of the Scale Invariant Feature Transform. http:\/\/www.cs.ubc.ca\/ lowe\/keypoints\/."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.438"},{"key":"e_1_3_2_1_28_1","volume-title":"DINOv2: Learning Robust Visual Features without Supervision. Transactions on Machine Learning Research","author":"Oquab Maxime","year":"2024","unstructured":"Maxime Oquab, Timoth\u00e9e Darcet, Th\u00e9o Moutakanni, Huy V. Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel HAZIZA, Francisco Massa, Alaaeldin El-Nouby, Mido Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Herve Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, and Piotr Bojanowski. 2024. DINOv2: Learning Robust Visual Features without Supervision. Transactions on Machine Learning Research (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Luc Van Gool, and Fisher Yu","author":"Piccinelli Luigi","year":"2024","unstructured":"Luigi Piccinelli, Yung-Hsu Yang, Christos Sakaridis, Mattia Segu, Siyuan Li, Luc Van Gool, and Fisher Yu. 2024. UniDepth: Universal Monocular Metric Depth Estimation. In Proc. CVPR."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01086"},{"key":"e_1_3_2_1_31_1","volume-title":"Fallon","author":"Ramezani Milad","year":"2022","unstructured":"Milad Ramezani, Mat\u00edas Mattamala, and Maurice F. Fallon. 2022. AEROS: AdaptivE RObust Least-Squares for Graph-Based SLAM. Frontiers Robotics AI, Vol. 9 (2022)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.440"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2461466.2461486"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00239"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"e_1_3_2_1_38_1","volume-title":"Fleet","author":"Saxena Saurabh","year":"2023","unstructured":"Saurabh Saxena, Charles Herrmann, Junhwa Hur, Abhishek Kar, Mohammad Norouzi, Deqing Sun, and David J. Fleet. 2023. The Surprising Effectiveness of Diffusion Models for Optical Flow and Monocular Depth Estimation. arXiv.cs, Vol. abs\/2306.01923 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Structure-from-Motion Revisited. In Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Sch\u00f6nberger Johannes Lutz","year":"2016","unstructured":"Johannes Lutz Sch\u00f6nberger and Jan-Michael Frahm. 2016a. Structure-from-Motion Revisited. In Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_40_1","volume-title":"Structure-from-Motion Revisited. In Proc. CVPR.","author":"Sch\u00f6nberger Johannes Lutz","year":"2016","unstructured":"Johannes Lutz Sch\u00f6nberger and Jan-Michael Frahm. 2016b. Structure-from-Motion Revisited. In Proc. CVPR."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_31"},{"key":"e_1_3_2_1_42_1","volume-title":"Pixelwise View Selection for Unstructured Multi-View Stereo. In European Conference on Computer Vision (ECCV).","author":"Sch\u00f6nberger Johannes Lutz","year":"2016","unstructured":"Johannes Lutz Sch\u00f6nberger, Enliang Zheng, Marc Pollefeys, and Jan-Michael Frahm. 2016b. Pixelwise View Selection for Unstructured Multi-View Stereo. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.272"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.377"},{"volume-title":"Australasian Conf. on Robotics and Automation.","author":"Silpa-Anan C.","key":"e_1_3_2_1_45_1","unstructured":"C. Silpa-Anan and R. Hartley. 2004. Localization using an image-map. In Australasian Conf. on Robotics and Automation."},{"volume-title":"Manual of Photogrammetry","author":"Slama C.","key":"e_1_3_2_1_46_1","unstructured":"C. Slama. 1980. Manual of Photogrammetry. American Society of Photogrammetry."},{"volume-title":"Proc","author":"Smith R. A.","key":"e_1_3_2_1_47_1","unstructured":"R. A. Smith, Andrew W. Fitzgibbon, and Andrew Zisserman. 1999. Improving Augmented Reality using Image and Scene Constraints. In Proc. BMVC. BMVA Press."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385773"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00881"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"e_1_3_2_1_51_1","volume-title":"Ba-net: Dense bundle adjustment network. arXiv preprint arXiv:1806.04807","author":"Tang Chengzhou","year":"2018","unstructured":"Chengzhou Tang and Ping Tan. 2018. Ba-net: Dense bundle adjustment network. arXiv preprint arXiv:1806.04807 (2018)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"e_1_3_2_1_53_1","volume-title":"Le","author":"Teo Choon Hui","year":"2010","unstructured":"Choon Hui Teo, S. V. N. Vishwanathan, Alexander J. Smola, and Quoc V. Le. 2010. Bundle Methods for Regularized Risk Minimization. JMLR, Vol. 11 (2010)."},{"volume-title":"Proc. of the Fifth Int'l Workshop on Algorithmic Foundations of Robotics.","author":"Thrun S.","key":"e_1_3_2_1_54_1","unstructured":"S. Thrun, D. Koller, Z. Ghahmarani, and H. Durrant-Whyte. 2002. SLAM Updates Require Constant Time. In Proc. of the Fifth Int'l Workshop on Algorithmic Foundations of Robotics."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1996.517170"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1999.791231"},{"volume-title":"Proc. ICCV Workshop.","author":"Triggs Bill","key":"e_1_3_2_1_57_1","unstructured":"Bill Triggs, Philip F. McLauchlan, Richard I. Hartley, and Andrew W. Fitzgibbon. 2000. Bundle Adjustment - A Modern Synthesis. In Proc. ICCV Workshop."},{"key":"e_1_3_2_1_58_1","volume-title":"Huang","author":"Tsai Roger Y.","year":"1984","unstructured":"Roger Y. Tsai and Thomas S. Huang. 1984. Uniqueness and Estimation of Three-Dimensional Motion Parameters of Rigid Objects with Curved Surfaces. PAMI, Vol. 6, 1 (1984)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"J.-R. Tsay and M.-S. Lee. 2012. SIFT For Dense Point Cloud Matching And Aero Triangulation. In International Archives of the Photogrammetry .","DOI":"10.5194\/isprsarchives-XXXIX-B3-69-2012"},{"key":"e_1_3_2_1_60_1","volume-title":"Jain","author":"T\u00fcceryan Mihran","year":"1998","unstructured":"Mihran T\u00fcceryan and Anil K. Jain. 1998. Texture Analysis. In The Handbook of Pattern Recognition and Computer Vision (2nd Edition). World Scientific Publishing Co."},{"key":"e_1_3_2_1_61_1","volume-title":"Van Gool","author":"Tuytelaars T.","year":"1999","unstructured":"T. Tuytelaars, M. Vergauwen, M. Pollefeys, and Luc J. Van Gool. 1999. Image Matching for Wide baseline Stereo. In Int'l Conf. on Forensic Human Identification."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.88573"},{"key":"e_1_3_2_1_63_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_64_1","article-title":"DM-VIO: Delayed Marginalization Visual-Inertial Odometry","volume":"7","author":"von Stumberg Lukas","year":"2022","unstructured":"Lukas von Stumberg and Daniel Cremers. 2022. DM-VIO: Delayed Marginalization Visual-Inertial Odometry. IEEE Robotics Autom. Lett., Vol. 7, 2 (2022).","journal-title":"IEEE Robotics Autom. Lett."},{"key":"e_1_3_2_1_65_1","volume-title":"3D Reconstruction with Spatial Memory. arXiv preprint arXiv:2408.16061","author":"Wang Hengyi","year":"2024","unstructured":"Hengyi Wang and Lourdes Agapito. 2024. 3D Reconstruction with Spatial Memory. arXiv preprint arXiv:2408.16061 (2024)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02049"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"crossref","unstructured":"Qianqian Wang Yifei Zhang Aleksander Holynski Alexei A. Efros and Angjoo Kanazawa. 2025. Continuous 3D Perception Model with Persistent State. showeprintarXiv:2501.12387","DOI":"10.1109\/CVPR52734.2025.00983"},{"key":"e_1_3_2_1_68_1","volume-title":"MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision. arXiv","author":"Wang Ruicheng","year":"1911","unstructured":"Ruicheng Wang, Sicheng Xu, Cassie Dai, Jianfeng Xiang, Yu Deng, Xin Tong, and Jiaolong Yang. 2024c. MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision. arXiv, Vol. 2410.19115 (2024)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"e_1_3_2_1_70_1","volume-title":"Deepsfm: Structure from motion via deep bundle adjustment. In Computer Vision-ECCV 2020: 16th European Conference","author":"Wei Xingkui","year":"2020","unstructured":"Xingkui Wei, Yinda Zhang, Zhuwen Li, Yanwei Fu, and Xiangyang Xue. 2020a. Deepsfm: Structure from motion via deep bundle adjustment. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part I 16. Springer, 230-247."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_14"},{"key":"e_1_3_2_1_72_1","volume-title":"BundleSDF: Neural 6-DoF Tracking and 3D Reconstruction of Unknown Objects. arXiv.cs","author":"Wen Bowen","year":"2023","unstructured":"Bowen Wen, Jonathan Tremblay, Valts Blukis, Stephen Tyree, Thomas M\u00fcller, Alex Evans, Dieter Fox, Jan Kautz, and Stan Birchfield. 2023. BundleSDF: Neural 6-DoF Tracking and 3D Reconstruction of Unknown Objects. arXiv.cs, Vol. abs\/2303.14158 (2023)."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00840"},{"key":"e_1_3_2_1_74_1","volume-title":"GS-SLAM: Dense Visual SLAM with 3D Gaussian Splatting. arXiv.cs","author":"Yan Chi","year":"2024","unstructured":"Chi Yan, Delin Qu, Dong Wang, Dan Xu, Zhigang Wang, Bin Zhao, and Xuelong Li. 2024. GS-SLAM: Dense Visual SLAM with 3D Gaussian Splatting. arXiv.cs (2024)."},{"key":"e_1_3_2_1_75_1","volume-title":"Fast3R: Towards 3D Reconstruction of 1000 Images in One Forward Pass. arXiv preprint arXiv:2501.13928","author":"Yang Jianing","year":"2025","unstructured":"Jianing Yang, Alexander Sax, Kevin J Liang, Mikael Henaff, Hao Tang, Ang Cao, Joyce Chai, Franziska Meier, and Matt Feiszli. 2025. Fast3R: Towards 3D Reconstruction of 1000 Images in One Forward Pass. arXiv preprint arXiv:2501.13928 (2025)."},{"key":"e_1_3_2_1_76_1","volume-title":"Depth Anything V2. arXiv:2406.09414","author":"Yang Lihe","year":"2024","unstructured":"Lihe Yang, Bingyi Kang, Zilong Huang, Zhen Zhao, Xiaogang Xu, Jiashi Feng, and Hengshuang Zhao. 2024. Depth Anything V2. arXiv:2406.09414 (2024)."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"crossref","unstructured":"Yao Yao Zixin Luo Shiwei Li Tian Fang and Long Quan. 2018a. MVSNet: Depth Inference for Unstructured Multi-view Stereo. In ECCV.","DOI":"10.1007\/978-3-030-01237-3_47"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_47"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00186"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_28"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00027"},{"key":"e_1_3_2_1_83_1","volume-title":"MonST3R: A Simple Approach for Estimating Geometry in the Presence of Motion. arXiv","author":"Zhang Junyi","year":"2024","unstructured":"Junyi Zhang, Charles Herrmann, Junhwa Hur, Varun Jampani, Trevor Darrell, Forrester Cole, Deqing Sun, and Ming-Hsuan Yang. 2024. MonST3R: A Simple Approach for Estimating Geometry in the Presence of Motion. arXiv, Vol. 2410.03825 (2024)."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"crossref","unstructured":"Zhe Zhang Rui Peng Yuxi Hu and Ronggang Wang. 2023. GeoMVSNet: Learning Multi-View Stereo with Geometry Perception. In CVPR.","DOI":"10.1109\/CVPR52729.2023.02060"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755722","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:01:54Z","timestamp":1765339314000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755722"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":84,"alternative-id":["10.1145\/3746027.3755722","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755722","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}