{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T14:38:43Z","timestamp":1772721523958,"version":"3.50.1"},"reference-count":123,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,5,25]],"date-time":"2022-05-25T00:00:00Z","timestamp":1653436800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,5,25]],"date-time":"2022-05-25T00:00:00Z","timestamp":1653436800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s11263-022-01615-7","type":"journal-article","created":{"date-parts":[[2022,5,25]],"date-time":"2022-05-25T09:04:37Z","timestamp":1653469477000},"page":"1811-1836","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":34,"title":["Investigating the Role of Image Retrieval for Visual Localization"],"prefix":"10.1007","volume":"130","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0600-9164","authenticated-orcid":false,"given":"Martin","family":"Humenberger","sequence":"first","affiliation":[]},{"given":"Yohann","family":"Cabon","sequence":"additional","affiliation":[]},{"given":"No\u00e9","family":"Pion","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4223-3983","authenticated-orcid":false,"given":"Philippe","family":"Weinzaepfel","sequence":"additional","affiliation":[]},{"given":"Donghwan","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Nicolas","family":"Gu\u00e9rin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9760-4553","authenticated-orcid":false,"given":"Torsten","family":"Sattler","sequence":"additional","affiliation":[]},{"given":"Gabriela","family":"Csurka","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,25]]},"reference":[{"key":"1615_CR1","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107, R., Gron\u00e1t, P., Torii, A., Pajdla, T., & Sivic, J. (2016). NetVLAD: CNN architecture for weakly supervised place recognition. In CVPR.","DOI":"10.1109\/CVPR.2016.572"},{"key":"1615_CR2","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107, R., & Zisserman, A. (2012). Three things everyone should know to improve object retrieval. In CVPR.","DOI":"10.1109\/CVPR.2012.6248018"},{"key":"1615_CR3","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107, R., & Zisserman, A. (2013). All about VLAD. In CVPR.","DOI":"10.1109\/CVPR.2013.207"},{"key":"1615_CR4","doi-asserted-by":"crossref","unstructured":"Arandjelovi\u0107, R., & Zisserman, A. (2014) DisLocation: Scalable descriptor distinctiveness for location recognition. In ACCV (pp. 188\u2013204). Springer.","DOI":"10.1007\/978-3-319-16817-3_13"},{"key":"1615_CR5","doi-asserted-by":"crossref","unstructured":"Arth, C., Wagner, D., Klopschitz, M., Irschara, A., & Schmalstieg, D. (2009) Wide area localization on mobile phones. In IEEE International Symposium on Mixed and Augmented Reality.","DOI":"10.1109\/ISMAR.2009.5336494"},{"key":"1615_CR6","doi-asserted-by":"crossref","unstructured":"Avrithis, Y., Kalantidis, Y., Tolias, G., & Spyrou, E. (2010). Retrieving landmark and non-landmark images from community photo collections. In ACMMM.","DOI":"10.1145\/1873951.1873973"},{"key":"1615_CR7","unstructured":"Babenko, A., & Lempitsky, V. (2015). Aggregating deep convolutional features for image retrieval. In ICCV."},{"key":"1615_CR8","doi-asserted-by":"crossref","unstructured":"Babenko, A., Slesarev, A., Chigorin, A., & Lempitsky, V. (2014). Neural codes for image retrieval. In ECCV.","DOI":"10.1007\/978-3-319-10590-1_38"},{"key":"1615_CR9","doi-asserted-by":"crossref","unstructured":"Balntas, V., Li, S., & Prisacariu, V. (2018). RelocNet: Continuous metric learning relocalisation using neural nets. In ECCV.","DOI":"10.1007\/978-3-030-01264-9_46"},{"key":"1615_CR10","doi-asserted-by":"crossref","unstructured":"Brachmann, E., Humenberger, M., Rother, C., & Sattler, T. (2021). On the limits of pseudo ground truth in visual camera re-localisation. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00616"},{"key":"1615_CR11","doi-asserted-by":"crossref","unstructured":"Brachmann, E., & Rother, C. (2018). Learning less is more\u20146D camera localization via 3D surface regression. In CVPR.","DOI":"10.1109\/CVPR.2018.00489"},{"key":"1615_CR12","doi-asserted-by":"crossref","unstructured":"Brachmann, E., & Rother, C. (2019). Expert sample consensus applied to camera re-localization. In ICCV.","DOI":"10.1109\/ICCV.2019.00762"},{"key":"1615_CR13","doi-asserted-by":"crossref","unstructured":"Brahmbhatt, S., Gu, J., Kim, K., Hays, J., & Kautz, J. (2018). Geometry-aware learning of maps for camera localization. In CVPR.","DOI":"10.1109\/CVPR.2018.00277"},{"issue":"3","key":"1615_CR14","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1007\/s10044-017-0611-1","volume":"20","author":"J Brejcha","year":"2017","unstructured":"Brejcha, J., & \u010cad\u00edk, M. (2017). State-of-the-art in visual geo-localization. Pattern Analysis and Applications (PAA), 20(3), 613\u2013637.","journal-title":"Pattern Analysis and Applications (PAA)"},{"key":"1615_CR15","doi-asserted-by":"crossref","unstructured":"Cao, B., Araujo, A., & Sim, J. (2020). Unifying deep local and global features for image search. In ECCV.","DOI":"10.1007\/978-3-030-58565-5_43"},{"key":"1615_CR16","doi-asserted-by":"crossref","unstructured":"Cao, S., & Snavely, N. (2013). Graph-based discriminative learning for location recognition. In CVPR.","DOI":"10.1109\/CVPR.2013.96"},{"key":"1615_CR17","doi-asserted-by":"crossref","unstructured":"Castle, R., Klein, G., & Murray, D. (2008). Video-rate localization in multiple maps for wearable augmented reality. In IEEE international symposium on wearable computers.","DOI":"10.1109\/ISWC.2008.4911577"},{"key":"1615_CR18","unstructured":"Cavallari, T., Bertinetto, L., Mukhoti, J., Torr, P., & Golodetz, S. (2017). Let\u2019s take this online: Adapting scene coordinate regression network predictions for online RGB-D camera relocalisation. In 3DV."},{"issue":"10","key":"1615_CR19","doi-asserted-by":"publisher","first-page":"2465","DOI":"10.1109\/TPAMI.2019.2915068","volume":"42","author":"T Cavallari","year":"2019","unstructured":"Cavallari, T., Golodetz, S., Lord, N., Valentin, J., Prisacariu, V., Di Stefano, L., & Torr, P. (2019). Real-time RGB-D camera pose estimation in novel scenes using a relocalisation cascade. IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 42(10), 2465\u20132477.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)"},{"key":"1615_CR20","doi-asserted-by":"crossref","unstructured":"Chen, D., Baatz, G., K\u00f6ser, K., Tsai, S., Vedantham, R., Pylv\u00e4n\u00e4inen, T., Roimela, K., Chen, X., Bach, J., Pollefeys, M., Girod, B., & Grzeszczuk, R. (2011). City-scale landmark identification on mobile devices. In CVPR.","DOI":"10.1109\/CVPR.2011.5995610"},{"issue":"8","key":"1615_CR21","doi-asserted-by":"publisher","first-page":"1472","DOI":"10.1109\/TPAMI.2007.70787","volume":"30","author":"O Chum","year":"2008","unstructured":"Chum, O., & Matas, J. (2008). Optimal randomized RANSAC. IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 30(8), 1472\u20131482.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)"},{"key":"1615_CR22","doi-asserted-by":"crossref","unstructured":"Crandall, D., Backstrom, L., Huttenlocher, D., & Kleinberg, J. (2009). Mapping the world\u2019s photos. In WWW.","DOI":"10.1145\/1526709.1526812"},{"key":"1615_CR23","unstructured":"Csurka, G., Dance, C., Fan, L., Willamowski, J., & Bray, C. (2004). Visual categorization with bags of keypoints. In ECCV Workshops."},{"key":"1615_CR24","unstructured":"Csurka, G., Dance, C., & Humenberger, M. (2018). From handcrafted to deep local invariant features. arXiv:1807.10254"},{"key":"1615_CR25","doi-asserted-by":"crossref","unstructured":"Cui, Q., Fragoso, V., Sweeney, C., & Sen, P. (2017). GraphMatch: Efficient large-scale graph construction for structure from motion. In 3DV.","DOI":"10.1109\/3DV.2017.00028"},{"key":"1615_CR26","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., & Zafeiriou, S. (2019). ArcFace: Additive angular margin loss for deep face recognition. In CVPR.","DOI":"10.1109\/CVPR.2019.00482"},{"key":"1615_CR27","doi-asserted-by":"crossref","unstructured":"Ding, M., Wang, Z., Sun, J., Shi, J., & Luo, P. (2019). CamNet: Coarse-to-fine retrieval for camera re-localization. In ICCV.","DOI":"10.1109\/ICCV.2019.00296"},{"key":"1615_CR28","doi-asserted-by":"crossref","unstructured":"Dusmanu, M., Rocco, I., Pajdla, T., Pollefeys, M., Sivic, J., Torii, A., & Sattler, T. (2019). D2-Net: A trainable CNN for joint description and detection of local features. In CVPR","DOI":"10.1109\/CVPR.2019.00828"},{"issue":"6","key":"1615_CR29","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/358669.358692","volume":"24","author":"M Fischler","year":"1981","unstructured":"Fischler, M., & Bolles, R. (1981). Random sample consensus: A paradigm for model fitting with applications to image analysis and automated cartography. Communications of the ACM, 24(6), 381\u2013395.","journal-title":"Communications of the ACM"},{"issue":"2","key":"1615_CR30","first-page":"1","volume":"64","author":"E Garcia-Fidalgo","year":"2015","unstructured":"Garcia-Fidalgo, E., & Ortiz, A. (2015). Vision-based topological mapping and localization methods: A survey. Robotics and Autonomous Systems (RAS), 64(2), 1\u201320.","journal-title":"Robotics and Autonomous Systems (RAS)"},{"key":"1615_CR31","doi-asserted-by":"crossref","unstructured":"Germain, H., Bourmaud, G., & Lepetit, V. (2019). Sparse-to-dense hypercolumn matching for long-term visual localization. In 3DV.","DOI":"10.1109\/3DV.2019.00063"},{"key":"1615_CR32","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/s11263-017-1016-8","volume":"124","author":"A Gordo","year":"2017","unstructured":"Gordo, A., Almaz\u00e1n, J., Revaud, J., & Larlus, D. (2017). End-to-end learning of deep visual representations for image retrieval. International Journal of Computer Vision (IJCV), 124, 237\u2013254.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1615_CR33","doi-asserted-by":"crossref","unstructured":"Hausler, S., Garg, S., Xu, M., Milford, M., & Fischer, T. (2021). Patch-NetVLAD: Multi-scale fusion of locally-global descriptors for place recognition. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01392"},{"key":"1615_CR34","doi-asserted-by":"crossref","unstructured":"Hays, J., & Efros, A. (2008). IM2GPS: Estimating geographic information from a single image. In CVPR.","DOI":"10.1109\/CVPR.2008.4587784"},{"key":"1615_CR35","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask R-CNN. In ICCV.","DOI":"10.1109\/ICCV.2017.322"},{"key":"1615_CR36","doi-asserted-by":"crossref","unstructured":"Heinly, J., Sch\u00f6nberger, J., Dunn, E., & Frahm, J. M. (2015). Reconstructing the world in six days as captured by the Yahoo 100 million image dataset. In CVPR.","DOI":"10.1109\/CVPR.2015.7298949"},{"key":"1615_CR37","doi-asserted-by":"crossref","unstructured":"Heng, L., Choi, B., Cui, Z., Geppert, M., Hu, S., Kuan, B., Liu, P., Nguyen, R., Yeo, Y., Geiger, A., Lee, G., Pollefeys, M., & Sattler, T. (2019). Project AutoVision: Localization and 3D scene perception for an autonomous vehicle with a multi-camera system. In ICRA.","DOI":"10.1109\/ICRA.2019.8793949"},{"key":"1615_CR38","unstructured":"Humenberger, M., Cabon, Y., Guerin, N., Morat, J., Revaud, J., Rerole, P., Pion, N., de\u00a0Souza, C., Leroy, V., & Csurka, G. (2020). Robust image retrieval-based visual localization using Kapture. arXiv:2007.13867"},{"key":"1615_CR39","doi-asserted-by":"crossref","unstructured":"Irschara, A., Zach, C., Frahm, J. M., & Bischof, H. (2009). From structure-from-motion point clouds to fast location recognition. In CVPR.","DOI":"10.1109\/CVPR.2009.5206587"},{"key":"1615_CR40","doi-asserted-by":"crossref","unstructured":"J\u00e9gou, H., & Chum, O. (2012). Negative evidences and co-occurrences in image retrieval: The benefit of PCA and whitening. In ECCV.","DOI":"10.1007\/978-3-642-33709-3_55"},{"key":"1615_CR41","doi-asserted-by":"crossref","unstructured":"J\u00e9gou, H., Douze, M., Schmid, C., & P\u00e9rez, P. (2010). Aggregating local descriptors into a compact image representation. In CVPR.","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"1615_CR42","doi-asserted-by":"crossref","unstructured":"Kalantidis, Y., Mellina, C., & Osindero, S. (2016). Cross-dimensional Weighting for aggregated deep convolutional features. In ECCV Workshops.","DOI":"10.1007\/978-3-319-46604-0_48"},{"issue":"9","key":"1615_CR43","first-page":"3121","volume":"74","author":"Y Kalantidis","year":"2011","unstructured":"Kalantidis, Y., Tolias, G., Avrithis, Y., Phinikettos, M., Spyrou, E., Mylonas, P., & Kollias, S. (2011). VIRaL: Visual image retrieval and localization. Multimedia Tools and Applications (MTA), 74(9), 3121\u20133135.","journal-title":"Multimedia Tools and Applications (MTA)"},{"key":"1615_CR44","doi-asserted-by":"crossref","unstructured":"Kendall, A., & Cipolla, R. (2017). Geometric loss functions for camera pose regression with deep learning. In CVPR.","DOI":"10.1109\/CVPR.2017.694"},{"key":"1615_CR45","doi-asserted-by":"crossref","unstructured":"Kendall, A., Grimes, M., & Cipolla, R. (2015). PoseNet: A convolutional network for real-time 6-DOF camera relocalization. In ICCV.","DOI":"10.1109\/ICCV.2015.336"},{"key":"1615_CR46","doi-asserted-by":"crossref","unstructured":"Kim, H., Dunn, E., & Frahm, J. M. (2017). Learned contextual feature reweighting for image geo-localization. In CVPR.","DOI":"10.1109\/CVPR.2017.346"},{"key":"1615_CR47","doi-asserted-by":"crossref","unstructured":"Kneip, L., Scaramuzza, D., & Siegwart, R. (2011). A novel parametrization of the perspective-three-point problem for a direct computation of absolute camera position and orientation. In CVPR.","DOI":"10.1109\/CVPR.2011.5995464"},{"key":"1615_CR48","doi-asserted-by":"crossref","unstructured":"Knopp, J., Sivic, J., & Pajdla, T. (2010). Avoiding confusing features in place recognition. In ECCV.","DOI":"10.1007\/978-3-642-15549-9_54"},{"key":"1615_CR49","doi-asserted-by":"crossref","unstructured":"Kukelova, Z., Bujnak, M., & Pajdla, T. (2013). Real-time solution to the absolute pose problem with unknown radial distortion and focal length. In ICCV.","DOI":"10.1109\/ICCV.2013.350"},{"key":"1615_CR50","doi-asserted-by":"crossref","unstructured":"Larsson, V., Kukelova, Z., & Zheng, Y. (2017). Making minimal solvers for absolute pose estimation compact and robust. In ICCV.","DOI":"10.1109\/ICCV.2017.254"},{"key":"1615_CR51","doi-asserted-by":"crossref","unstructured":"Laskar, Z., Melekhov, I., Kalia, S., & Kannala, J. (2017). Camera relocalization by computing pairwise relative poses using convolutional neural network. In ICCV Workshops.","DOI":"10.1109\/ICCVW.2017.113"},{"key":"1615_CR52","doi-asserted-by":"crossref","unstructured":"Lebeda, K., Matas, J., & Chum, O. (2012). Fixing the locally optimized RANSAC. In BMVC.","DOI":"10.5244\/C.26.95"},{"key":"1615_CR53","doi-asserted-by":"crossref","unstructured":"Lee, D., Ryu, S., Yeon, S., Lee, Y., Kim, D., Han, C., Cabon, Y., Weinzaepfel, P., Guerin, N., Csurka, G., & Humenberger, M. (2021). Large-scale localization datasets in crowded indoor spaces. In CVPR.","DOI":"10.1109\/CVPR46437.2021.00324"},{"key":"1615_CR54","doi-asserted-by":"crossref","unstructured":"Li, X., Wang, S., Zhao, Y., Verbeek, J., & Kannala, J. (2020). Hierarchical scene coordinate classification and regression for visual localization. In CVPR.","DOI":"10.1109\/CVPR42600.2020.01200"},{"key":"1615_CR55","unstructured":"Li, Y., Crandall, D., & Huttenlocher, D. (2009). Landmark classification in large-scale image collections. In ICCV."},{"key":"1615_CR56","doi-asserted-by":"crossref","unstructured":"Li, Y., Snavely, N., & Huttenlocher, D. (2010). Location recognition using prioritized feature matching. In ECCV.","DOI":"10.1007\/978-3-642-15552-9_57"},{"key":"1615_CR57","doi-asserted-by":"crossref","unstructured":"Li, Y., Snavely, N., Huttenlocher, D., & Fua, P. (2012) Worldwide pose estimation using 3D point clouds. In ECCV.","DOI":"10.1007\/978-3-642-33718-5_2"},{"issue":"4\u20135","key":"1615_CR58","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1177\/0278364914561101","volume":"34","author":"H Lim","year":"2015","unstructured":"Lim, H., Sinha, S., Cohen, M., Uyttendaele, M., & Kim, H. (2015). Real-time monocular image-based 6-DoF localization. International Journal of Robotics Research, 34(4\u20135), 476\u2013492.","journal-title":"International Journal of Robotics Research"},{"key":"1615_CR59","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. (2014). Microsoft COCO: Common objects in context. In ECCV.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1615_CR60","doi-asserted-by":"crossref","unstructured":"Liu, L., Li, H., & Dai, Y. (2019). Stochastic attraction-repulsion embedding for large scale image localization. In ICCV.","DOI":"10.1109\/ICCV.2019.00266"},{"key":"1615_CR61","unstructured":"Liu, R., Li, Z., & Jia, J. (2008). Image partial blur detection and classification. In CVPR."},{"issue":"2","key":"1615_CR62","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"D Lowe","year":"2004","unstructured":"Lowe, D. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision (IJCV), 60(2), 91\u2013110.","journal-title":"International Journal of Computer Vision (IJCV)"},{"issue":"1","key":"1615_CR63","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TRO.2015.2496823","volume":"32","author":"S Lowry","year":"2016","unstructured":"Lowry, S., S\u00fcnderhauf, N., Newman, P., Leonard, J., Cox, D., Corke, P., & Milford, M. (2016). Visual place recognition: A survey. IEEE Transactions on Robotics, 32(1), 1\u201319.","journal-title":"IEEE Transactions on Robotics"},{"key":"1615_CR64","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1023\/A:1008854305733","volume":"4","author":"F Lu","year":"1997","unstructured":"Lu, F., & Milios, E. (1997). Globally consistent range scan alignment for environment mapping. Autonomous Robots, 4, 333\u201334.","journal-title":"Autonomous Robots"},{"key":"1615_CR65","doi-asserted-by":"crossref","unstructured":"Lynen, S., Sattler, T., Bosse, M., Hesch, J., Pollefeys, M., & Siegwart, R. (2015). Get out of my Lab: Large-scale, real-time visual-inertial localization. In RSS.","DOI":"10.15607\/RSS.2015.XI.037"},{"issue":"1","key":"1615_CR66","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364916679498","volume":"36","author":"W Maddern","year":"2017","unstructured":"Maddern, W., Pascoe, G., Linegar, C., & Newman, P. (2017). 1 Year, 1000 km: The Oxford RobotCar dataset. International Journal of Robotics Research, 36(1), 3\u201315.","journal-title":"International Journal of Robotics Research"},{"key":"1615_CR67","doi-asserted-by":"crossref","unstructured":"Massiceti, D., Krull, A., Brachmann, E., Rother, C., & Torr, P. (2017). Random forests versus neural networks\u2014What\u2019s best for camera localization? In ICRA.","DOI":"10.1109\/ICRA.2017.7989598"},{"key":"1615_CR68","doi-asserted-by":"crossref","unstructured":"Middelberg, S., Sattler, T., Untzelmann, O., & Kobbelt, L. (2014). Scalable 6-DoF localization on mobile devices. In ECCV.","DOI":"10.1007\/978-3-319-10605-2_18"},{"key":"1615_CR69","doi-asserted-by":"crossref","unstructured":"Myers, J., & Well, A. (2003). Research design and statistical analysis. Lawrence Erlbaum Associates.","DOI":"10.4324\/9781410607034"},{"key":"1615_CR70","doi-asserted-by":"crossref","unstructured":"Noh, H., Araujo, A., Sim, J., Weyand, T., & Han, B. (2017) Large-scale image retrieval with attentive deep local features. In ICCV.","DOI":"10.1109\/ICCV.2017.374"},{"key":"1615_CR71","first-page":"240","volume":"58","author":"K Pearson","year":"1985","unstructured":"Pearson, K. (1985). Notes on regression and inheritance in the case of two parents. Proceedings of the Royal Society of London, 58, 240\u2013242.","journal-title":"Proceedings of the Royal Society of London"},{"key":"1615_CR72","doi-asserted-by":"crossref","unstructured":"Perronnin, F., & Dance, C. (2007). Fisher kernels on visual vocabularies for image categorization. In CVPR.","DOI":"10.1109\/CVPR.2007.383266"},{"key":"1615_CR73","doi-asserted-by":"crossref","unstructured":"Philbin, J., Chum, O., Isard, M., Sivic, J., & Zisserman, A. (2007). Object retrieval with large vocabularies and fast spatial matching. In CVPR.","DOI":"10.1109\/CVPR.2007.383172"},{"key":"1615_CR74","doi-asserted-by":"crossref","unstructured":"Philbin, J., Chum, O., Isard, M., Sivic, J., & Zisserman, A. (2008). Lost in quantization: Improving particular object retrieval in large scale image databases. In CVPR.","DOI":"10.1109\/CVPR.2008.4587635"},{"issue":"2","key":"1615_CR75","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1016\/j.patcog.2017.09.013","volume":"74","author":"N Piasco","year":"2018","unstructured":"Piasco, N., Sidib\u00e9, D., Demonceaux, C., & Gouet-Brunet, V. (2018). A survey on visual-based localization: On the benefit of heterogeneous data. Pattern Recognition, 74(2), 90\u2013109.","journal-title":"Pattern Recognition"},{"key":"1615_CR76","doi-asserted-by":"crossref","unstructured":"Pion, N., Humenberger, M., Csurka\u00a0Khedari, G., Cabon, Y., & Torsten, S. (2020). Benchmarking image retrieval for visual localization. In 3DV.","DOI":"10.1109\/3DV50981.2020.00058"},{"key":"1615_CR77","doi-asserted-by":"crossref","unstructured":"Radenovi\u0107, F., Iscen, A., Tolias, G., & Avrithis Yannis\u00a0Chum, O. (2018). Revisiting Oxford and Paris: Large-scale image retrieval benchmarking. In CVPR.","DOI":"10.1109\/CVPR.2018.00598"},{"issue":"7","key":"1615_CR78","doi-asserted-by":"publisher","first-page":"1655","DOI":"10.1109\/TPAMI.2018.2846566","volume":"41","author":"F Radenovi\u0107","year":"2019","unstructured":"Radenovi\u0107, F., Tolias, G., & Chum, O. (2019). Fine-tuning CNN image retrieval with no human annotation. IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 41(7), 1655\u20131668.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)"},{"issue":"3","key":"1615_CR79","doi-asserted-by":"publisher","first-page":"251","DOI":"10.3169\/mta.4.251","volume":"4","author":"A Razavian","year":"2015","unstructured":"Razavian, A., Sullivan, J., Carlsson, S., & Maki, A. (2015). Visual instance retrieval with deep convolutional networks. ITE Transactions on Media Technology and Applications, 4(3), 251\u2013258.","journal-title":"ITE Transactions on Media Technology and Applications"},{"key":"1615_CR80","doi-asserted-by":"crossref","unstructured":"Revaud, J., Almazan, J., de Rezende, R. S., & de Souza, C. R. (2019a). Learning with average precision: Training image retrieval with a listwise loss. In ICCV.","DOI":"10.1109\/ICCV.2019.00521"},{"key":"1615_CR81","unstructured":"Revaud, J., Weinzaepfel, P., De Souza, C., & Humenberger, M. (2019b). R2D2: Reliable and repeatable detectors and descriptors. In NeurIPS."},{"key":"1615_CR82","unstructured":"Revaud, J., Weinzaepfel, P., De Souza, C., Pion, N., Csurka, G., Cabon, Y., & Humenberger, M. (2019c). R2D2: Reliable and repeatable detectors and descriptors for joint sparse keypoint detection and local feature extraction. arXiv:1906.06195"},{"key":"1615_CR83","doi-asserted-by":"crossref","unstructured":"Sarlin, P. E., Cadena, C., Siegwart, R., & Dymczyk, M. (2019). From coarse to fine: Robust hierarchical localization at large scale. In CVPR.","DOI":"10.1109\/CVPR.2019.01300"},{"key":"1615_CR84","doi-asserted-by":"crossref","unstructured":"Sarlin, P. E., Unagar, A., Larsson, M., Germain, H., Toft, C., Larsson, V., Pollefeys, M., Lepetit, V., Hammarstrand, L., Kahl, F., & Sattler, T. (2021). Back to the feature: Learning robust camera localization from pixels to pose. In CVPR.","DOI":"10.1109\/CVPR46437.2021.00326"},{"key":"1615_CR85","doi-asserted-by":"crossref","unstructured":"Sattler, T., Havlena, M., Radenovi\u0107, F., Schindler, K., & Pollefeys, M. (2015). Hyperpoints and fine vocabularies for large-scale location recognition. In ICCV.","DOI":"10.1109\/ICCV.2015.243"},{"key":"1615_CR86","doi-asserted-by":"crossref","unstructured":"Sattler, T., Havlena, M., Schindler, K., & Pollefey, M. (2016). Large-scale location recognition and the geometric burstiness problem. In CVPR.","DOI":"10.1109\/CVPR.2016.175"},{"issue":"9","key":"1615_CR87","doi-asserted-by":"publisher","first-page":"1744","DOI":"10.1109\/TPAMI.2016.2611662","volume":"39","author":"T Sattler","year":"2017","unstructured":"Sattler, T., Leibe, B., & Kobbelt, L. (2017). Efficient & effective prioritized matching for large-scale image-based localization. IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 39(9), 1744\u20131756.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)"},{"key":"1615_CR88","doi-asserted-by":"crossref","unstructured":"Sattler, T., Maddern, W., Toft, C., Torii, A., Hammarstrand, L., Stenborg, E., Safari, D., Okutomi, M., Pollefeys, M., Sivic, J., Kahl, F., & Pajdla, T. (2018). Benchmarking 6DoF outdoor visual localization in changing conditions. In CVPR.","DOI":"10.1109\/CVPR.2018.00897"},{"key":"1615_CR89","doi-asserted-by":"crossref","unstructured":"Sattler, T., Weyand, T., Leibe, B., & Kobbelt, L. (2012). Image retrieval for image-based localization revisited. In BMVC.","DOI":"10.5244\/C.26.76"},{"key":"1615_CR90","doi-asserted-by":"crossref","unstructured":"Sattler, T., Zhou, Q., Pollefeys, M., & Leal-Taix\u00e9, L. (2019). Understanding the limitations of CNN-based absolute camera pose regression. In CVPR.","DOI":"10.1109\/CVPR.2019.00342"},{"key":"1615_CR91","doi-asserted-by":"crossref","unstructured":"Schindler, G., Brown, M., & Szeliski, R. (2007). City-scale location recognition. In CVPR.","DOI":"10.1109\/CVPR.2007.383150"},{"key":"1615_CR92","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J., & Frahm, J. M. (2016). Structure-from-motion revisited. In CVPR.","DOI":"10.1109\/CVPR.2016.445"},{"key":"1615_CR93","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J., Hardmeier, H., Sattler, T., & Pollefeys, M. (2017). Comparative evaluation of hand-crafted and learned local features. In CVPR.","DOI":"10.1109\/CVPR.2017.736"},{"key":"1615_CR94","unstructured":"Se, S., Lowe, D., & Little, J. (2002). Global localization using distinctive visual features. In IROS."},{"key":"1615_CR95","doi-asserted-by":"crossref","unstructured":"Shotton, J., Glocker, B., Zach, C., Izadi, S., Criminisi, A., & Fitzgibbon, A. (2013). Scene coordinate regression forests for camera relocalization in RGB-D images. In CVPR.","DOI":"10.1109\/CVPR.2013.377"},{"key":"1615_CR96","doi-asserted-by":"crossref","unstructured":"Sivic, J., & Zisserman, A. (2003). Video Google: A text retrieval approach to object matching in videos. In ICCV.","DOI":"10.1109\/ICCV.2003.1238663"},{"issue":"2","key":"1615_CR97","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1007\/s11263-007-0107-3","volume":"80","author":"N Snavely","year":"2008","unstructured":"Snavely, N., Seitz, S., & Szeliski, R. (2008). Modeling the world from internet photo collections. International Journal of Computer Vision (IJCV), 80(2), 189\u2013210.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1615_CR98","doi-asserted-by":"crossref","unstructured":"Sun, X., Xie, Y., Luo, P., & Wang, L. (2017). A dataset for benchmarking image-based localization. In CVPR.","DOI":"10.1109\/CVPR.2017.598"},{"key":"1615_CR99","doi-asserted-by":"crossref","unstructured":"Taira, H., Okutomi, M., Sattler, T., Cimpoi, M., Pollefeys, M., Sivic, J., Pajdla, T., & Akihiko, T. (2018). InLoc: Indoor visual localization with dense matching and view synthesis. In CVPR.","DOI":"10.1109\/CVPR.2018.00752"},{"key":"1615_CR100","doi-asserted-by":"crossref","unstructured":"Taira, H., Okutomi, M., Sattler, T., Cimpoi, M., Pollefeys, M., Sivic, J., Pajdla, T., & Akihiko, T. (2019a). InLoc: Indoor visual localization with dense matching and view synthesis. IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI). (Early Acces).","DOI":"10.1109\/CVPR.2018.00752"},{"key":"1615_CR101","doi-asserted-by":"crossref","unstructured":"Taira, H., Rocco, I., Sedlar, J., Okutomi, M., Sivic, J., Pajdla, T., Sattler, T., & Torii, A. (2019b). Is This the Right Place? Geometric-semantic pose verification for indoor visual localization. In ICCV.","DOI":"10.1109\/ICCV.2019.00447"},{"key":"1615_CR102","doi-asserted-by":"crossref","unstructured":"Tang, S., Tang, C., Huang, R., Zhu, S., & Tan, P. (2021). Learning camera localization via dense scene matching. In CVPR.","DOI":"10.1109\/CVPR46437.2021.00187"},{"issue":"10","key":"1615_CR103","first-page":"3466","volume":"47","author":"G Tolias","year":"2014","unstructured":"Tolias, G., & J\u00e9gou, H. (2014). Visual query expansion with or without geometry: Refining local descriptors by feature aggregation. Computer Vision and Image Understanding (CVIU), 47(10), 3466\u20133476.","journal-title":"Computer Vision and Image Understanding (CVIU)"},{"key":"1615_CR104","unstructured":"Tolias, G., Sicre, R., & J\u00e9gou, H. (2016). Particular object retrieval with integral maxpooling of CNN activations. In ICLR."},{"key":"1615_CR105","doi-asserted-by":"crossref","unstructured":"Torii, A., Arandjelovi\u0107, R., Sivic, J., Okutomi, M., & Pajdla, T. (2015a). 24\/7 Place recognition by view synthesis. In CVPR.","DOI":"10.1109\/CVPR.2015.7298790"},{"issue":"2","key":"1615_CR106","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/TPAMI.2017.2667665","volume":"40","author":"A Torii","year":"2018","unstructured":"Torii, A., Arandjelovi\u0107, R., Sivic, J., Okutomi, M., & Pajdla, T. (2018). 24\/7 Place recognition by view synthesis. IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 40(2), 257\u2013271.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)"},{"key":"1615_CR107","doi-asserted-by":"crossref","unstructured":"Torii, A., Sivic, J., Okutomi, M., & Pajdla, T. (2015b). Visual place recognition with repetitive structures. IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 37(11), 2346\u20132359.","DOI":"10.1109\/TPAMI.2015.2409868"},{"key":"1615_CR108","doi-asserted-by":"crossref","unstructured":"Torii, A., Sivic, J., & Pajdla, T. (2011). Visual localization by linear combination of image descriptors. In ICCV Workshops.","DOI":"10.1109\/ICCVW.2011.6130230"},{"issue":"3","key":"1615_CR109","doi-asserted-by":"publisher","first-page":"814","DOI":"10.1109\/TPAMI.2019.2941876","volume":"43","author":"A Torii","year":"2021","unstructured":"Torii, A., Taira, H., Sivic, J., Pollefeys, M., Okutomi, M., Pajdla, T., & Sattler, T. (2021). Are large-scale 3D models really necessary for accurate visual localization? IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 43(3), 814\u2013829.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)"},{"issue":"4","key":"1615_CR110","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1109\/TVCG.2014.27","volume":"20","author":"J Ventura","year":"2014","unstructured":"Ventura, J., Arth, C., Reitmayr, G., & Schmalstieg, D. (2014). Global localization from monocular SLAM on a mobile phone. IEEE Transactions on Visualization and Computer Graphics, 20(4), 531\u2013539.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"1615_CR111","doi-asserted-by":"crossref","unstructured":"Vo, N., Jacobs, N., & Hays, J. (2017). Revisiting IM2GPS in the deep learning era. In ICCV.","DOI":"10.1109\/ICCV.2017.286"},{"key":"1615_CR112","doi-asserted-by":"crossref","unstructured":"Walch, F., Hazirbas, C., Leal-Taix\u00e9, L., Sattler, T., Hilsenbeck, S., & Cremers, D. (2017). Image-based localization using LSTMs for structured feature correlation. In ICCV.","DOI":"10.1109\/ICCV.2017.75"},{"key":"1615_CR113","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Csurka, G., Cabon, Y., & Humenberger, M. (2019). Visual localization by learning objects-of-interest dense match regression. In CVPR.","DOI":"10.1109\/CVPR.2019.00578"},{"key":"1615_CR114","doi-asserted-by":"crossref","unstructured":"Weyand, T., Araujo, A., Cao, B., & Sim, J. (2020). Google Landmarks dataset v2\u2013 A large-scale benchmark for instance-level recognition and retrieval. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00265"},{"key":"1615_CR115","doi-asserted-by":"crossref","unstructured":"Wijmans, E., & Furukawa, Y. (2017). Exploiting 2D floorplan for building-scale panorama RGB-D alignment. In CVPR.","DOI":"10.1109\/CVPR.2017.156"},{"key":"1615_CR116","doi-asserted-by":"crossref","unstructured":"Yang, L., Bai, Z., Tang, C., Li, H., Furukawa, Y., & Tan, P. (2019). SANet: Scene agnostic network for camera localization. In ICCV.","DOI":"10.1109\/ICCV.2019.00013"},{"key":"1615_CR117","doi-asserted-by":"crossref","unstructured":"Zamir, A., Hakeem, A., Gool, L., Shah, M., & Richard, S. (2016). Large-scale visual geo-localization. In Advances in computer vision and pattern recognition. Springer.","DOI":"10.1007\/978-3-319-25781-5"},{"key":"1615_CR118","doi-asserted-by":"crossref","unstructured":"Zamir, A. R., & Shah, M. (2010). Accurate image localization based on google maps street view. In ECCV.","DOI":"10.1007\/978-3-642-15561-1_19"},{"key":"1615_CR119","doi-asserted-by":"crossref","unstructured":"Zhang, W., & Kosecka, J. (2006). Image based localization in urban environments. In International symposium on 3D data processing, visualization, and transmission.","DOI":"10.1109\/3DPVT.2006.80"},{"key":"1615_CR120","doi-asserted-by":"publisher","first-page":"821","DOI":"10.1007\/s11263-020-01399-8","volume":"129","author":"Z Zhang","year":"2021","unstructured":"Zhang, Z., Sattler, T., & Scaramuzza, D. (2021). Reference pose generation for long-term visual localization via learned features and view synthesis. International Journal of Computer Vision (IJCV), 129, 821\u2013844.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1615_CR121","doi-asserted-by":"crossref","unstructured":"Zheng, E., & Wu, C. (2015). Structure from motion using structure-less resection. In ICCV.","DOI":"10.1109\/ICCV.2015.240"},{"key":"1615_CR122","unstructured":"Zheng, L., Zhao, Y., Wang, S., Wang, J., & Tian, Q. (2016). Good practice in CNN feature transfer. arXiv:1604.00133"},{"key":"1615_CR123","doi-asserted-by":"crossref","unstructured":"Zhou, Q., Sattler, T., Pollefeys, M., & Leal-Taix\u00e9, L. (2020). To learn or not to learn: Visual localization from essential matrices. In ICRA.","DOI":"10.1109\/ICRA40945.2020.9196607"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01615-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-022-01615-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01615-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,13]],"date-time":"2023-01-13T05:12:56Z","timestamp":1673586776000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-022-01615-7"}},"subtitle":["An Exhaustive Benchmark"],"short-title":[],"issued":{"date-parts":[[2022,5,25]]},"references-count":123,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["1615"],"URL":"https:\/\/doi.org\/10.1007\/s11263-022-01615-7","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,5,25]]},"assertion":[{"value":"28 February 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 March 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 May 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}