{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T17:19:57Z","timestamp":1778347197035,"version":"3.51.4"},"publisher-location":"Cham","reference-count":126,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732195","type":"print"},{"value":"9783031732201","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73220-1_5","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T20:02:26Z","timestamp":1730577746000},"page":"71-91","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":209,"title":["Grounding Image Matching in\u00a03D with\u00a0MASt3R"],"prefix":"10.1007","author":[{"given":"Vincent","family":"Leroy","sequence":"first","affiliation":[]},{"given":"Yohann","family":"Cabon","sequence":"additional","affiliation":[]},{"given":"Jerome","family":"Revaud","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"5_CR1","unstructured":"Scipy. https:\/\/docs.scipy.org\/doc\/scipy"},{"key":"5_CR2","unstructured":"RGBD Objects in the Wild: Scaling Real-World 3D Object Learning from RGB-D Videos (2024). http:\/\/arxiv.org\/abs\/2401.12592. arXiv:2401.12592"},{"key":"5_CR3","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/s11263-016-0902-9","volume":"120","author":"H Aan\u00e6s","year":"2016","unstructured":"Aan\u00e6s, H., Jensen, R.R., Vogiatzis, G., Tola, E., Dahl, A.B.: Large-scale data for multiple-view stereopsis. IJCV 120, 153\u2013168 (2016)","journal-title":"IJCV"},{"key":"5_CR4","unstructured":"Addison, H., Eduard, T., etru1927, Kwang\u00a0Moo, Y., old ufo, Sohier, D., Yuhe, J.: Image matching challenge 2022 (2022). https:\/\/kaggle.com\/competitions\/image-matching-challenge-2022"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Arnold, E., et al.: Map-free visual relocalization: metric pose relative to a single image. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19769-7_40"},{"key":"5_CR6","unstructured":"Ashley, C., et al.: Image matching challenge 2023 (2023). https:\/\/kaggle.com\/competitions\/image-matching-challenge-2023"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Balntas, V., Lenc, K., Vedaldi, A., Mikolajczyk, K.: HPatches: a benchmark and evaluation of handcrafted and learned local descriptors. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.410"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Barroso-Laguna, A., Munukutla, S., Prisacariu, V.A., Brachmann, E.: Matching 2D images in 3D: metric relative pose from metric correspondences. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00464"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Barroso-Laguna, A., Riba, E., Ponsa, D., Mikolajczyk, K.: Key.Net: keypoint detection by handcrafted and learned CNN filters. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00593"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"Bhalgat, Y., Henriques, J.F., Zisserman, A.: A light touch approach to teaching transformers multi-view geometry. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, 17\u201324 June 2023 (2023)","DOI":"10.1109\/CVPR52729.2023.00480"},{"key":"5_CR11","doi-asserted-by":"crossref","unstructured":"Bhowmik, A., Gumhold, S., Rother, C., Brachmann, E.: Reinforced feature points: optimizing feature detection and description for a high-level task. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00500"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"B\u00f6kman, G., Kahl, F.: A case for using rotation invariant features in state of the art feature matchers. In: CVPRW (2022)","DOI":"10.1109\/CVPRW56347.2022.00559"},{"key":"5_CR13","unstructured":"Cabon, Y., Murray, N., Humenberger, M.: Virtual KITTI 2. CoRR abs\/2001.10773 (2020)"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Campbell, N.D.F., Vogiatzis, G., Hern\u00e1ndez, C., Cipolla, R.: Using multiple hypotheses to improve depth-maps for multi-view stereo. In: ECCV (2008)","DOI":"10.1007\/978-3-540-88682-2_58"},{"key":"5_CR15","doi-asserted-by":"crossref","unstructured":"Campos, C., Elvira, R., Rodr\u00ed\u00adguez, J.J.G., Montiel, J.M., Tard\u00f3s, J.D.: Orb-slam3: an accurate open-source library for visual, visual\u2013inertial, and multimap slam. IEEE Trans. Robot. 37(6), 1874\u20131890 (2021)","DOI":"10.1109\/TRO.2021.3075644"},{"key":"5_CR16","unstructured":"Chaplot, D.S., Gandhi, D., Gupta, S., Gupta, A., Salakhutdinov, R.: Learning to explore using active neural slam. arXiv preprint arXiv:2004.05155 (2020)"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Chen, H., et al.: Aspanformer: detector-free image matching with adaptive span transformer. In: European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-19824-3_2"},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Cheng, S., et al.: Deep stereo using adaptive thin volume representation with uncertainty awareness. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00260"},{"key":"5_CR19","unstructured":"Csurka, G., Dance, C., Humenberger, M.: From Handcrafted to Deep Local Invariant Features. arXiv:1807.10254 (2018)"},{"key":"5_CR20","unstructured":"Dehghan, A., et al.: ARKitScenes: a diverse real-world dataset for 3D indoor scene understanding using mobile RGB-D data. In: NeurIPS Datasets and Benchmarks (2021)"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"DeTone, D., Malisiewicz, T., Rabinovich, A.: Superpoint: self-supervised interest point detection and description. In: CVPR (2018)","DOI":"10.1109\/CVPRW.2018.00060"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Ding, Y., et al.: Transmvsnet: global context-aware multi-view stereo network with transformers. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00839"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Dong, Q., Cao, C., Fu, Y.: Rethinking optical flow from geometric matching consistent perspective. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00135"},{"key":"5_CR24","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. ICLR (2021)"},{"key":"5_CR25","unstructured":"Douze, M., et al.: The faiss library (2024)"},{"key":"5_CR26","unstructured":"Duda, R., Hart, P., Stork, D.G.: Pattern Classification (2001)"},{"key":"5_CR27","doi-asserted-by":"publisher","unstructured":"Dusmanu, M., et al.: D2-net: a trainable CNN for joint description and detection of local features. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2019, Long Beach, CA, USA, 16\u201320 June 2019, pp. 8092\u20138101. Computer Vision Foundation\/IEEE (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00828. http:\/\/openaccess.thecvf.com\/content_CVPR_2019\/html\/Dusmanu_D2-Net_A_Trainable_CNN_for_Joint_Description_and_Detection_of_CVPR_2019_paper.html","DOI":"10.1109\/CVPR.2019.00828"},{"key":"5_CR28","doi-asserted-by":"crossref","unstructured":"Edstedt, J., Athanasiadis, I., Wadenb\u00e4ck, M., Felsberg, M.: DKM: dense kernelized feature matching for geometry estimation. In: IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01704"},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Edstedt, J., Sun, Q., B\u00f6kman, G., Wadenb\u00e4ck, M., Felsberg, M.: RoMa: Robust Dense Feature Matching. arXiv preprint arXiv:2305.15404 (2023)","DOI":"10.1109\/CVPR52733.2024.01871"},{"key":"5_CR30","doi-asserted-by":"crossref","unstructured":"Efe, U., Ince, K.G., Alatan, A.: DFM: a performance baseline for deep feature matching. In: CVPRW (2021)","DOI":"10.1109\/CVPRW53098.2021.00484"},{"issue":"6","key":"5_CR31","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/358669.358692","volume":"24","author":"MA Fischler","year":"1981","unstructured":"Fischler, M.A., Bolles, R.C.: Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography. Commun. ACM 24(6), 381\u2013395 (1981). https:\/\/doi.org\/10.1145\/358669.358692","journal-title":"Commun. ACM"},{"issue":"8","key":"5_CR32","doi-asserted-by":"publisher","first-page":"1362","DOI":"10.1109\/TPAMI.2009.161","volume":"32","author":"Y Furukawa","year":"2010","unstructured":"Furukawa, Y., Ponce, J.: Accurate, dense, and robust multiview stereopsis. PAMI 32(8), 1362\u20131376 (2010)","journal-title":"PAMI"},{"key":"5_CR33","doi-asserted-by":"crossref","unstructured":"Galliani, S., Lasinger, K., Schindler, K.: Massively parallel multiview stereopsis by surface normal diffusion. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.106"},{"key":"5_CR34","doi-asserted-by":"crossref","unstructured":"Germain, H., Bourmaud, G., Lepetit, V.: S2DNet: learning image features for accurate sparse-to-dense matching. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58580-8_37"},{"key":"5_CR35","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.patrec.2014.03.023","volume":"50","author":"L Gomes","year":"2014","unstructured":"Gomes, L., Bellon, O.R.P., Silva, L.: 3D reconstruction methods for digital preservation of cultural heritage: a survey. Pattern Recognit. Lett. 50, 3\u201314 (2014)","journal-title":"Pattern Recognit. Lett."},{"key":"5_CR36","doi-asserted-by":"crossref","unstructured":"Gu, X., Fan, Z., Zhu, S., Dai, Z., Tan, F., Tan, P.: Cascade cost volume for high-resolution multi-view stereo and stereo matching. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00257"},{"key":"5_CR37","unstructured":"Hammarstrand, L., et al.: Long-Term Visual Localization Benchmark. https:\/\/www.visuallocalization.net\/"},{"key":"5_CR38","doi-asserted-by":"publisher","unstructured":"Hartley, R., Zisserman, A.: Multiple View Geometry in Computer Vision. Cambridge University Press, Cambridge (2004). https:\/\/doi.org\/10.1017\/CBO9780511811685","DOI":"10.1017\/CBO9780511811685"},{"key":"5_CR39","doi-asserted-by":"crossref","unstructured":"He, K., Lu, Y., Sclaroff, S.: Local descriptors optimized for average precision. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00069"},{"key":"5_CR40","doi-asserted-by":"crossref","unstructured":"He, Y., Yan, R., Fragkiadaki, K., Yu, S.: Epipolar transformers. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, 13\u201319 June 2020 (2020)","DOI":"10.1109\/CVPR42600.2020.00780"},{"key":"5_CR41","unstructured":"Hendrycks, D., Gimpel, K.: Bridging nonlinearities and stochastic regularizers with gaussian error linear units. CoRR abs\/1606.08415 (2016). http:\/\/arxiv.org\/abs\/1606.08415"},{"key":"5_CR42","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: Flowformer: a transformer architecture for optical flow. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19790-1_40"},{"key":"5_CR43","unstructured":"Humenberger, M., et al.: Robust image retrieval-based visual localization using kapture (2020)"},{"key":"5_CR44","doi-asserted-by":"crossref","unstructured":"Jiang, S., Campbell, D., Lu, Y., Li, H., Hartley, R.I.: Learning to estimate hidden motions with global motion aggregation (2021)","DOI":"10.1109\/ICCV48922.2021.00963"},{"key":"5_CR45","doi-asserted-by":"crossref","unstructured":"Jiang, W., Trulls, E., Hosang, J., Tagliasacchi, A., Yi, K.M.: COTR: correspondence transformer for matching across images. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00615"},{"issue":"2","key":"5_CR46","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1007\/s11263-020-01385-0","volume":"129","author":"Y Jin","year":"2020","unstructured":"Jin, Y., et al.: Image matching across wide baselines: from paper to practice. IJCV 129(2), 517\u2013547 (2020)","journal-title":"IJCV"},{"issue":"3","key":"5_CR47","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"key":"5_CR48","unstructured":"Junjie, N., et al.: Pats: patch area transportation with subdivision for local feature matching. In: CVPR (2023)"},{"key":"5_CR49","unstructured":"Kloepfer, D.A., Henriques, J.F., Campbell, D.: SCENES: Subpixel Correspondence Estimation With Epipolar Supervision (2024). http:\/\/arxiv.org\/abs\/2401.10886"},{"key":"5_CR50","doi-asserted-by":"crossref","unstructured":"Li, Z., Snavely, N.: Megadepth: learning single-view depth prediction from internet photos. In: CVPR, pp. 2041\u20132050 (2018)","DOI":"10.1109\/CVPR.2018.00218"},{"key":"5_CR51","doi-asserted-by":"crossref","unstructured":"Lin, A., Zhang, J.Y., Ramanan, D., Tulsiani, S.: Relpose++: recovering 6D poses from sparse-view observations. CoRR abs\/2305.04926 (2023)","DOI":"10.1109\/3DV62453.2024.00126"},{"key":"5_CR52","doi-asserted-by":"crossref","unstructured":"Lindenberger, P., Sarlin, P., Larsson, V., Pollefeys, M.: Pixel-perfect structure-from-motion with featuremetric refinement. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00593"},{"key":"5_CR53","doi-asserted-by":"crossref","unstructured":"Lindenberger, P., Sarlin, P., Pollefeys, M.: Lightglue: local feature matching at light speed. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01616"},{"key":"5_CR54","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"D Lowe","year":"2004","unstructured":"Lowe, D.: Distinctive image features from scale-invariant keypoints. IJCV 60, 91\u2013110 (2004)","journal-title":"IJCV"},{"key":"5_CR55","doi-asserted-by":"crossref","unstructured":"Luo, Z., et al.: Aslfeat: learning local features of accurate shape and localization. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00662"},{"issue":"1","key":"5_CR56","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1007\/s11263-020-01359-2","volume":"129","author":"J Ma","year":"2021","unstructured":"Ma, J., Jiang, X., Fan, A., Jiang, J., Yan, J.: Image matching from handcrafted to deep features: a survey. IJCV 129(1), 23\u201379 (2021)","journal-title":"IJCV"},{"key":"5_CR57","doi-asserted-by":"crossref","unstructured":"Ma, Z., Teed, Z., Deng, J.: Multiview stereo with cascaded epipolar raft. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19821-2_42"},{"key":"5_CR58","unstructured":"Maneewongvatana, S., Mount, D.M.: Analysis of approximate nearest neighbor searching with clustered point sets. In: DIMACS. DIMACS Series in Discrete Mathematics and Theoretical Computer Science (1999)"},{"key":"5_CR59","doi-asserted-by":"crossref","unstructured":"Mayer, N., et al.: A large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.438"},{"key":"5_CR60","doi-asserted-by":"crossref","unstructured":"Melekhov, I., Tiulpin, A., Sattler, T., Pollefeys, M., Rahtu, E., Kannala, J.: DGC-Net: dense geometric correspondence network. In: Proceedings of the IEEE Winter Conference on Applications of Computer Vision (WACV) (2019)","DOI":"10.1109\/WACV.2019.00115"},{"key":"5_CR61","doi-asserted-by":"crossref","unstructured":"Mishkin, D., Matas, J., Perdoch, M., Lenc, K.: WxBS: wide baseline stereo generalizations. In: Xie, X., Jones, M.W., Tam, G.K.L. (eds.) BMVC (2015)","DOI":"10.5244\/C.29.12"},{"key":"5_CR62","doi-asserted-by":"crossref","unstructured":"Mishkin, D., Radenovic, F., Matas, J.: Repeatability is not enough: learning affine regions via discriminability. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01240-3_18"},{"issue":"5","key":"5_CR63","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1109\/TRO.2015.2463671","volume":"31","author":"R Mur-Artal","year":"2015","unstructured":"Mur-Artal, R., Montiel, J.M.M., Tardos, J.D.: ORB-SLAM: a versatile and accurate monocular slam system. IEEE Trans. Robot. 31(5), 1147\u20131163 (2015)","journal-title":"IEEE Trans. Robot."},{"key":"5_CR64","doi-asserted-by":"crossref","unstructured":"Na, Y., Kim, W.J., Han, K.B., Ha, S., Yoon, S.E.: Uforecon: generalizable sparse-view surface reconstruction from arbitrary and unfavorable sets. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00487"},{"key":"5_CR65","unstructured":"van\u00a0den Oord, A., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. CoRR abs\/1807.03748 (2018). http:\/\/arxiv.org\/abs\/1807.03748"},{"key":"5_CR66","unstructured":"Oquab, M., et al.: Dinov2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"5_CR67","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1017\/S096249291700006X","volume":"26","author":"O \u00d6zye\u015fil","year":"2017","unstructured":"\u00d6zye\u015fil, O., Voroninski, V., Basri, R., Singer, A.: A survey of structure from motion*. Acta Numer. 26, 305\u2013364 (2017)","journal-title":"Acta Numer."},{"key":"5_CR68","doi-asserted-by":"publisher","first-page":"869","DOI":"10.5194\/isprs-archives-XLII-2-869-2018","volume":"42","author":"M Peppa","year":"2018","unstructured":"Peppa, M., et al.: Archaeological feature detection from archive aerial photography with a SFM-MVS and image enhancement pipeline. Int. Arch. Photogramm. Remote. Sens. Spat. Inf. Sci. 42, 869\u2013875 (2018)","journal-title":"Int. Arch. Photogramm. Remote. Sens. Spat. Inf. Sci."},{"key":"5_CR69","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"5_CR70","doi-asserted-by":"crossref","unstructured":"Ranjan, A., Black, M.J.: Optical flow estimation using a spatial pyramid network. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.291"},{"key":"5_CR71","doi-asserted-by":"crossref","unstructured":"Reizenstein, J., Shapovalov, R., Henzler, P., Sbordone, L., Labatut, P., Novotn\u00fd, D.: Common objects in 3D: large-scale learning and evaluation of real-life 3D category reconstruction. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"5_CR72","doi-asserted-by":"crossref","unstructured":"Revaud, J., Weinzaepfel, P., Harchaoui, Z., Schmid, C.: EpicFlow: edge-preserving interpolation of correspondences for optical flow. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298720"},{"key":"5_CR73","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1007\/s11263-016-0908-3","volume":"120","author":"J Revaud","year":"2016","unstructured":"Revaud, J., Weinzaepfel, P., Harchaoui, Z., Schmid, C.: DeepMatching: hierarchical deformable dense matching. IJCV 120, 300\u2013323 (2016)","journal-title":"IJCV"},{"key":"5_CR74","unstructured":"Revaud, J., Weinzaepfel, P., de\u00a0Souza, C.R., Humenberger, M.: R2D2: repeatable and reliable detector and descriptor. In: NIPS (2019)"},{"key":"5_CR75","doi-asserted-by":"crossref","unstructured":"Rockwell, C., Kulkarni, N., Jin, L., Park, J.J., Johnson, J., Fouhey, D.F.: FAR: flexible, accurate and robust 6DoF relative camera pose estimation (2024)","DOI":"10.1109\/CVPR52733.2024.01877"},{"key":"5_CR76","doi-asserted-by":"crossref","unstructured":"Rublee, E., Rabaud, V., Konolige, K., Bradski, G.R.: ORB: an efficient alternative to SIFT or SURF. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126544"},{"key":"5_CR77","doi-asserted-by":"crossref","unstructured":"Sarlin, P.E., Cadena, C., Siegwart, R., Dymczyk, M.: From coarse to fine: robust hierarchical localization at large scale. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01300"},{"key":"5_CR78","doi-asserted-by":"crossref","unstructured":"Sarlin, P., DeTone, D., Malisiewicz, T., Rabinovich, A.: SuperGlue: learning feature matching with graph neural networks. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"5_CR79","doi-asserted-by":"crossref","unstructured":"Savva, M., et al.: Habitat: a platform for embodied AI research. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00943"},{"key":"5_CR80","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J.L., Hardmeier, H., Sattler, T., Pollefeys, M.: Comparative evaluation of hand-crafted and learned local features. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.736"},{"key":"5_CR81","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J.L., Frahm, J.M.: Structure-from-motion revisited. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"5_CR82","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J.L., Zheng, E., Pollefeys, M., Frahm, J.M.: Pixelwise view selection for unstructured multi-view stereo. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46487-9_31"},{"key":"5_CR83","doi-asserted-by":"crossref","unstructured":"Sethi, I.K., Jain, R.C.: Finding trajectories of feature points in a monocular image sequence. IEEE TPAMI (1987)","DOI":"10.1109\/TPAMI.1987.4767872"},{"key":"5_CR84","doi-asserted-by":"crossref","unstructured":"Shi, X., et al.: Videoflow: exploiting temporal cues for multi-frame optical flow estimation. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01146"},{"key":"5_CR85","doi-asserted-by":"crossref","unstructured":"Shi, X., et al.: Flowformer++: masked cost volume autoencoding for pretraining optical flow estimation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00160"},{"key":"5_CR86","unstructured":"Spencer, J., Russell, C., Hadfield, S., Bowden, R.: Kick back & relax++: scaling beyond ground-truth depth with SlowTV & CribsTV. arXiv Preprint (2024)"},{"key":"5_CR87","doi-asserted-by":"crossref","unstructured":"Sun, J., Shen, Z., Wang, Y., Bao, H., Zhou, X.: LoFTR: detector-free local feature matching with transformers. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00881"},{"key":"5_CR88","doi-asserted-by":"crossref","unstructured":"Sun, P., et al.: Scalability in perception for autonomous driving: waymo open dataset. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"5_CR89","doi-asserted-by":"crossref","unstructured":"Taira, H., et al.: InLoc: indoor visual localization with dense matching and view synthesis. PAMI (2019)","DOI":"10.1109\/CVPR.2018.00752"},{"key":"5_CR90","unstructured":"Tang, S., Zhang, J., Zhu, S., Tan, P.: Quadtree attention for vision transformers. ICLR (2022)"},{"key":"5_CR91","doi-asserted-by":"crossref","unstructured":"Teed, Z., Deng, J.: RAFT: recurrent all-pairs field transforms for optical flow. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58536-5_24"},{"issue":"3","key":"5_CR92","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1145\/504729.504754","volume":"45","author":"S Thrun","year":"2002","unstructured":"Thrun, S.: Probabilistic robotics. Commun. ACM 45(3), 52\u201357 (2002)","journal-title":"Commun. ACM"},{"key":"5_CR93","doi-asserted-by":"crossref","unstructured":"Tian, Y., Yu, X., Fan, B., Wu, F., Heijnen, H., Balntas, V.: Sosnet: second order similarity regularization for local descriptor learning. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01127"},{"key":"5_CR94","doi-asserted-by":"crossref","unstructured":"Toft, C., Turmukhambetov, D., Sattler, T., Kahl, F., Brostow, G.J.: Single-image depth prediction makes feature matching easier. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58517-4_28"},{"key":"5_CR95","doi-asserted-by":"publisher","first-page":"903","DOI":"10.1007\/s00138-011-0346-8","volume":"23","author":"E Tola","year":"2012","unstructured":"Tola, E., Strecha, C., Fua, P.: Efficient large-scale multi-view stereo for ultra high-resolution image sets. Mach. Vis. Appl. 23, 903\u2013920 (2012)","journal-title":"Mach. Vis. Appl."},{"key":"5_CR96","doi-asserted-by":"crossref","unstructured":"Tosi, F., Liao, Y., Schmitt, C., Geiger, A.: SMD-nets: stereo mixture density networks. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00883"},{"key":"5_CR97","doi-asserted-by":"crossref","unstructured":"Truong, P., Danelljan, M., Gool, L.V., Timofte, R.: Learning accurate dense correspondences and when to trust them. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00566"},{"key":"5_CR98","doi-asserted-by":"crossref","unstructured":"Truong, P., Danelljan, M., Timofte, R.: GLU-Net: global-local universal network for dense flow and correspondences. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00629"},{"issue":"8","key":"5_CR99","doi-asserted-by":"publisher","first-page":"10247","DOI":"10.1109\/TPAMI.2023.3249225","volume":"45","author":"P Truong","year":"2023","unstructured":"Truong, P., Danelljan, M., Timofte, R., Gool, L.V.: PDC-net+: enhanced probabilistic dense correspondence network. IEEE TPAMI 45(8), 10247\u201310266 (2023)","journal-title":"IEEE TPAMI"},{"key":"5_CR100","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., et al. (eds.) NeurIPS (2017)"},{"key":"5_CR101","doi-asserted-by":"crossref","unstructured":"Verdie, Y., Yi, K.M., Fua, P., Lepetit, V.: TILDE: a temporally invariant learned detector. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299165"},{"key":"5_CR102","doi-asserted-by":"crossref","unstructured":"Wang, B., et al.: P2-net: joint description and detection of local features for pixel and point matching. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01570"},{"key":"5_CR103","doi-asserted-by":"crossref","unstructured":"Wang, F., Galliani, S., Vogel, C., Speciale, P., Pollefeys, M.: Patchmatchnet: learned multi-view patchmatch stereo. In: CVPR, pp. 14194\u201314203 (2021)","DOI":"10.1109\/CVPR46437.2021.01397"},{"key":"5_CR104","doi-asserted-by":"crossref","unstructured":"Wang, J., Rupprecht, C., Novotny, D.: PoseDiffusion: solving pose estimation via diffusion-aided bundle adjustment (2023)","DOI":"10.1109\/ICCV51070.2023.00896"},{"key":"5_CR105","doi-asserted-by":"crossref","unstructured":"Wang, Q., Zhou, X., Hariharan, B., Snavely, N.: Learning feature descriptors using camera pose supervision. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58452-8_44"},{"key":"5_CR106","doi-asserted-by":"crossref","unstructured":"Wang, S., Leroy, V., Cabon, Y., Chidlovskii, B., Revaud, J.: DUSt3R: geometric 3D vision made easy (2023)","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"5_CR107","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Tartanair: a dataset to push the limits of visual slam (2020)","DOI":"10.1109\/IROS45743.2020.9341801"},{"key":"5_CR108","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Lucas, T., et al.: CroCo v2: improved cross-view completion pre-training for stereo matching and optical flow. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01647"},{"key":"5_CR109","unstructured":"Wu, C.: VisualSFM: A Visual Structure from Motion System (2011). http:\/\/ccwu.me\/vsfm\/"},{"key":"5_CR110","unstructured":"Wu, H., Sankaranarayanan, A.C., Chellappa, R.: CVPR (2007)"},{"key":"5_CR111","doi-asserted-by":"crossref","unstructured":"Xu, Q., Tao, W.: Learning inverse depth regression for multi-view stereo with correlation cost volume. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6939"},{"key":"5_CR112","unstructured":"Yang, G., Malisiewicz, T., Belongie, S.J.: Learning data-adaptive interest points through epipolar adaptation. In: CVPR Workshops (2019)"},{"key":"5_CR113","doi-asserted-by":"crossref","unstructured":"Yang, J., Mao, W., \u00c1lvarez, J.M., Liu, M.: Cost volume pyramid based depth inference for multi-view stereo. In: CVPR, pp. 4876\u20134885 (2020)","DOI":"10.1109\/CVPR42600.2020.00493"},{"key":"5_CR114","doi-asserted-by":"crossref","unstructured":"Yao, Y., Luo, Z., Li, S., Fang, T., Quan, L.: Mvsnet: depth inference for unstructured multi-view stereo. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01237-3_47"},{"key":"5_CR115","doi-asserted-by":"crossref","unstructured":"Yao, Y., et al.: Blendedmvs: a large-scale dataset for generalized multi-view stereo networks. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00186"},{"key":"5_CR116","doi-asserted-by":"crossref","unstructured":"Yao, Y., Jafarian, Y., Park, H.S.: MONET: multiview semi-supervised keypoint detection via epipolar divergence. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00084"},{"key":"5_CR117","doi-asserted-by":"crossref","unstructured":"Yeshwanth, C., Liu, Y.C., Nie\u00dfner, M., Dai, A.: Scannet++: a high-fidelity dataset of 3D indoor scenes. In: Proceedings of the International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"5_CR118","doi-asserted-by":"crossref","unstructured":"Yifan, W., Doersch, C., Arandjelovic, R., Carreira, J., Zisserman, A.: Input-level inductive biases for 3D reconstruction. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, 18\u201324 June 2022 (2022)","DOI":"10.1109\/CVPR52688.2022.00608"},{"key":"5_CR119","unstructured":"Zhang, J.Y., Lin, A., Kumar, M., Yang, T.H., Ramanan, D., Tulsiani, S.: Cameras as rays: pose estimation via ray diffusion. In: International Conference on Learning Representations (ICLR) (2024)"},{"key":"5_CR120","doi-asserted-by":"crossref","unstructured":"Zhang, J.Y., Ramanan, D., Tulsiani, S.: Relpose: predicting probabilistic relative rotation for single objects in the wild. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19821-2_34"},{"key":"5_CR121","doi-asserted-by":"crossref","unstructured":"Zhang, X., Yu, F.X., Karaman, S., Chang, S.: Learning discriminative and transformation covariant local feature detectors. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.523"},{"key":"5_CR122","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Peng, R., Hu, Y., Wang, R.: Geomvsnet: learning multi-view stereo with geometry perception. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02060"},{"key":"5_CR123","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Sattler, T., Scaramuzza, D.: Reference pose generation for long-term visual localization via learned features and view synthesis. In: IJCV (2021)","DOI":"10.1007\/s11263-020-01399-8"},{"key":"5_CR124","doi-asserted-by":"crossref","unstructured":"Zhou, Q., Sattler, T., Leal-Taixe, L.: Patch2pix: epipolar-guided pixel-level correspondences. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00464"},{"key":"5_CR125","doi-asserted-by":"crossref","unstructured":"Zhou, T., Tucker, R., Flynn, J., Fyffe, G., Snavely, N.: Stereo magnification: learning view synthesis using multiplane images. In: SIGGRAPH (2018)","DOI":"10.1145\/3197517.3201323"},{"key":"5_CR126","doi-asserted-by":"crossref","unstructured":"Zhu, S., Liu, X.: Pmatch: paired masked image modeling for dense geometric matching. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02098"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73220-1_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T20:03:29Z","timestamp":1730577809000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73220-1_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9783031732195","9783031732201"],"references-count":126,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73220-1_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}