{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T13:13:22Z","timestamp":1778332402776,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755466","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"4484-4493","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Regist3R: Incremental Registration with Stereo Foundation Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7715-4698","authenticated-orcid":false,"given":"Sidun","family":"Liu","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2218-6483","authenticated-orcid":false,"given":"Wenyu","family":"Li","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6752-7892","authenticated-orcid":false,"given":"Peng","family":"Qiao","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1256-8934","authenticated-orcid":false,"given":"Yong","family":"Dou","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology National Key Laboratory of Parallel and Distributed Computing, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0902-9"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2001269.2001293"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00619"},{"key":"e_1_3_2_1_4_1","volume-title":"Arkitscenes: A diverse real-world dataset for 3d indoor scene understanding using mobile rgb-d data. arXiv preprint arXiv:2111.08897","author":"Baruch Gilad","year":"2021","unstructured":"Gilad Baruch, Zhuoyuan Chen, Afshin Dehghan, Tal Dimry, Yuri Feigin, Peter Fu, Thomas Gebauer, Brandon Joffe, Daniel Kurz, Arik Schwartz, et al. 2021. Arkitscenes: A diverse real-world dataset for 3d indoor scene understanding using mobile rgb-d data. arXiv preprint arXiv:2111.08897 (2021)."},{"key":"e_1_3_2_1_5_1","volume-title":"European Conference on Computer Vision. Springer, 421--440","author":"Brachmann Eric","year":"2024","unstructured":"Eric Brachmann, Jamie Wynn, Shuai Chen, Tommaso Cavallari, \u00c1ron Monszpart, Daniyar Turmukhambetov, and Victor Adrian Prisacariu. 2024. Scene coordinate reconstruction: Posing of image collections via incremental learning of a relocalizer. In European Conference on Computer Vision. Springer, 421--440."},{"key":"e_1_3_2_1_6_1","volume-title":"MUSt3R: Multi-view Network for Stereo 3D Reconstruction. arXiv preprint arXiv:2503.01661","author":"Cabon Yohann","year":"2025","unstructured":"Yohann Cabon, Lucas Stoffl, Leonid Antsfeld, Gabriela Csurka, Boris Chidlovskii, Jerome Revaud, and Vincent Leroy. 2025. MUSt3R: Multi-view Network for Stereo 3D Reconstruction. arXiv preprint arXiv:2503.01661 (2025)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3139681"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.257"},{"key":"e_1_3_2_1_9_1","volume-title":"MASt3R-SfM: a Fully-Integrated Solution for Unconstrained Structure-from-Motion. arXiv preprint arXiv:2409.19152","author":"Duisterhof Bardienus","year":"2024","unstructured":"Bardienus Duisterhof, Lojze Zust, Philippe Weinzaepfel, Vincent Leroy, Yohann Cabon, and Jerome Revaud. 2024. MASt3R-SfM: a Fully-Integrated Solution for Unconstrained Structure-from-Motion. arXiv preprint arXiv:2409.19152 (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00828"},{"key":"e_1_3_2_1_11_1","volume-title":"Light3R-SfM: Towards Feed-forward Structure-from-Motion. arXiv preprint arXiv:2501.14914","author":"Elflein Sven","year":"2025","unstructured":"Sven Elflein, Qunjie Zhou, S\u00e9rgio Agostinho, and Laura Leal-Taix\u00e9. 2025. Light3R-SfM: Towards Feed-forward Structure-from-Motion. arXiv preprint arXiv:2501.14914 (2025)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02056"},{"key":"e_1_3_2_1_13_1","volume-title":"Building Rome with Convex Optimization. arXiv preprint arXiv:2502.04640","author":"Han Haoyu","year":"2025","unstructured":"Haoyu Han and Heng Yang. 2025. Building Rome with Convex Optimization. arXiv preprint arXiv:2502.04640 (2025)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02040"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV50981.2020.00015"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.66"},{"key":"e_1_3_2_1_17_1","volume-title":"European Conference on Computer Vision. Springer, 71--91","author":"Leroy Vincent","year":"2024","unstructured":"Vincent Leroy, Yohann Cabon, and J\u00e9r\u00f4me Revaud. 2024. Grounding image matching in 3d with mast3r. In European Conference on Computer Vision. Springer, 71--91."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-025-03333-0"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00218"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00593"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.403"},{"key":"e_1_3_2_1_23_1","volume-title":"International Workshop on Reproducible Research in Pattern Recognition. Springer, 60--74","author":"Moulon Pierre","year":"2016","unstructured":"Pierre Moulon, Pascal Monasse, Romuald Perrot, and Renaud Marlet. 2016. OpenMVG: Open multiple view geometry. In International Workshop on Reproducible Research in Pattern Recognition. Springer, 60--74."},{"key":"e_1_3_2_1_24_1","volume-title":"Orb-slam2: An open-source slam system for monocular, stereo, and rgb-d cameras","author":"Mur-Artal Raul","year":"2017","unstructured":"Raul Mur-Artal and Juan D Tard\u00f3s. 2017. Orb-slam2: An open-source slam system for monocular, stereo, and rgb-d cameras. IEEE transactions on robotics 33, 5 (2017), 1255--1262."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298883"},{"key":"e_1_3_2_1_26_1","volume-title":"European Conference on Computer Vision. Springer, 58--77","author":"Pan Linfei","year":"2024","unstructured":"Linfei Pan, D\u00e1niel Bar\u00e1th, Marc Pollefeys, and Johannes L Sch\u00f6nberger. 2024. Global structure-from-motion revisited. In European Conference on Computer Vision. Springer, 58--77."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00074"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.377"},{"key":"e_1_3_2_1_32_1","volume-title":"Flowmap: High-quality camera poses, intrinsics, and depth via gradient descent. arXiv preprint arXiv:2404.15259","author":"Smith Cameron","year":"2024","unstructured":"Cameron Smith, David Charatan, Ayush Tewari, and Vincent Sitzmann. 2024. Flowmap: High-quality camera poses, intrinsics, and depth via gradient descent. arXiv preprint arXiv:2404.15259 (2024)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Noah Snavely Steven M Seitz and Richard Szeliski. 2006. Photo tourism: exploring photo collections in 3D. In ACM siggraph 2006 papers. 835--846.","DOI":"10.1145\/1141911.1141964"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0107-3"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"e_1_3_2_1_36_1","volume-title":"Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds. arXiv preprint arXiv:2412.06974","author":"Tang Zhenggang","year":"2024","unstructured":"Zhenggang Tang, Yuchen Fan, Dilin Wang, Hongyu Xu, Rakesh Ranjan, Alexander Schwing, and Zhicheng Yan. 2024. MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds. arXiv preprint arXiv:2412.06974 (2024)."},{"key":"e_1_3_2_1_37_1","volume-title":"Droid-slam: Deep visual slam for monocular, stereo, and rgb-d cameras. Advances in neural information processing systems 34","author":"Teed Zachary","year":"2021","unstructured":"Zachary Teed and Jia Deng. 2021. Droid-slam: Deep visual slam for monocular, stereo, and rgb-d cameras. Advances in neural information processing systems 34 (2021), 16558--16569."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.177"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.88573"},{"key":"e_1_3_2_1_40_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_41_1","volume-title":"3d reconstruction with spatial memory. arXiv preprint arXiv:2408.16061","author":"Wang Hengyi","year":"2024","unstructured":"Hengyi Wang and Lourdes Agapito. 2024. 3d reconstruction with spatial memory. arXiv preprint arXiv:2408.16061 (2024)."},{"key":"e_1_3_2_1_42_1","volume-title":"VGGT: Visual Geometry Grounded Transformer. arXiv preprint arXiv:2503.11651","author":"Wang Jianyuan","year":"2025","unstructured":"Jianyuan Wang, Minghao Chen, Nikita Karaev, Andrea Vedaldi, Christian Rupprecht, and David Novotny. 2025. VGGT: Visual Geometry Grounded Transformer. arXiv preprint arXiv:2503.11651 (2025)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02049"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00896"},{"key":"e_1_3_2_1_45_1","volume-title":"Moge: Unlocking accurate monocular geometry estimation for open-domain images with optimal training supervision. arXiv preprint arXiv:2410.19115","author":"Wang Ruicheng","year":"2024","unstructured":"Ruicheng Wang, Sicheng Xu, Cassie Dai, Jianfeng Xiang, Yu Deng, Xin Tong, and Jiaolong Yang. 2024. Moge: Unlocking accurate monocular geometry estimation for open-domain images with optimal training supervision. arXiv preprint arXiv:2410.19115 (2024)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10578-9_5"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2013.25"},{"key":"e_1_3_2_1_49_1","volume-title":"Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass. arXiv preprint arXiv:2501.13928","author":"Yang Jianing","year":"2025","unstructured":"Jianing Yang, Alexander Sax, Kevin J Liang, Mikael Henaff, Hao Tang, Ang Cao, Joyce Chai, Franziska Meier, and Matt Feiszli. 2025. Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass. arXiv preprint arXiv:2501.13928 (2025)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00186"},{"key":"e_1_3_2_1_51_1","volume-title":"Chinese Conference on Pattern Recognition and Computer Vision (PRCV). Springer, 580--592","author":"Ye Zongxin","year":"2024","unstructured":"Zongxin Ye, Wenyu Li, Sidun Liu, Peng Qiao, and Yong Dou. 2024. ER-SFM: Efficient and Robust Cluster-Based Structure from Motion. In Chinese Conference on Pattern Recognition and Computer Vision (PRCV). Springer, 580--592."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"e_1_3_2_1_53_1","volume-title":"Proceedings, Part VI 14","author":"Yi Kwang Moo","year":"2016","unstructured":"Kwang Moo Yi, Eduard Trulls, Vincent Lepetit, and Pascal Fua. 2016. Lift: Learned invariant feature transform. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part VI 14. Springer, 467--483."},{"key":"e_1_3_2_1_54_1","volume-title":"Cameras as rays: Pose estimation via ray diffusion. arXiv preprint arXiv:2402.14817","author":"Zhang Jason Y","year":"2024","unstructured":"Jason Y Zhang, Amy Lin, Moneish Kumar, Tzu-Hsuan Yang, Deva Ramanan, and Shubham Tulsiani. 2024. Cameras as rays: Pose estimation via ray diffusion. arXiv preprint arXiv:2402.14817 (2024)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_31"},{"key":"e_1_3_2_1_56_1","volume-title":"Parallel structure from motion from local increment to global averaging. arXiv preprint arXiv:1702.08601","author":"Zhu Siyu","year":"2017","unstructured":"Siyu Zhu, Tianwei Shen, Lei Zhou, Runze Zhang, Jinglu Wang, Tian Fang, and Long Quan. 2017. Parallel structure from motion from local increment to global averaging. arXiv preprint arXiv:1702.08601 (2017)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755466","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:22:26Z","timestamp":1765308146000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755466"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":56,"alternative-id":["10.1145\/3746027.3755466","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755466","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}