{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T06:36:17Z","timestamp":1780986977738,"version":"3.54.1"},"reference-count":119,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2020,10,7]],"date-time":"2020-10-07T00:00:00Z","timestamp":1602028800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,10,7]],"date-time":"2020-10-07T00:00:00Z","timestamp":1602028800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2021,2]]},"DOI":"10.1007\/s11263-020-01385-0","type":"journal-article","created":{"date-parts":[[2020,10,7]],"date-time":"2020-10-07T19:02:29Z","timestamp":1602097349000},"page":"517-547","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":317,"title":["Image Matching Across Wide Baselines: From Paper to Practice"],"prefix":"10.1007","volume":"129","author":[{"given":"Yuhe","family":"Jin","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8205-6718","authenticated-orcid":false,"given":"Dmytro","family":"Mishkin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anastasiia","family":"Mishchuk","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0863-4844","authenticated-orcid":false,"given":"Jiri","family":"Matas","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pascal","family":"Fua","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kwang Moo","family":"Yi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1425-7881","authenticated-orcid":false,"given":"Eduard","family":"Trulls","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,10,7]]},"reference":[{"key":"1385_CR1","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1007\/s11263-011-0473-8","volume":"97","author":"H Aanaes","year":"2012","unstructured":"Aanaes, H., Dahl, A. L., & Steenstrup-Pedersen, K. (2012). Interesting interest points. International Journal of Computer Vision, 97, 18\u201335.","journal-title":"International Journal of Computer Vision"},{"key":"1385_CR2","unstructured":"Aanaes, H., & Kahl, F. (2002). Estimation of deformable structure and motion. In Vision and modelling of dynamic scenes workshop."},{"key":"1385_CR3","unstructured":"Agarwal, S., Snavely, N., Simon, I., Seitz, S., & Szeliski, R. (2009). Building Rome in one day. In International conference on computer vision."},{"key":"1385_CR4","doi-asserted-by":"crossref","unstructured":"Alahi, A., Ortiz, R., & Vandergheynst, P. (2012). FREAK: Fast retina keypoint. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2012.6247715"},{"key":"1385_CR5","doi-asserted-by":"crossref","unstructured":"Alcantarilla, P.\u00a0F., Nuevo, J., & Bartoli, A. (2013). Fast explicit diffusion for accelerated features in nonlinear scale spaces. In British machine vision conference.","DOI":"10.5244\/C.27.13"},{"key":"1385_CR6","doi-asserted-by":"publisher","first-page":"3807","DOI":"10.1016\/j.imavis.2019.08.011","volume":"97","author":"J Aldana-Iuit","year":"2019","unstructured":"Aldana-Iuit, J., Mishkin, D., Chum, O., & Matas, J. (2019). Saddle: Fast and repeatable features with good coverage. Image and Vision Computing, 97, 3807.","journal-title":"Image and Vision Computing"},{"key":"1385_CR7","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Gronat, P., Torii, A., Pajdla, T., & Sivic, J. (2016). NetVLAD: CNN architecture for weakly supervised place recognition. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2016.572"},{"key":"1385_CR8","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R. & Zisserman, A. (2012). Three things everyone should know to improve object retrieval. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2012.6248018"},{"key":"1385_CR9","unstructured":"Badino, H., Huber, D., & Kanade, T. (2011). The CMU visual localization data set. http:\/\/3dvis.ri.cmu.edu\/data-sets\/localization."},{"key":"1385_CR10","unstructured":"Balntas, V. (2018). SILDa: A multi-task dataset for evaluating visual localization. https:\/\/research.scape.io\/silda\/."},{"key":"1385_CR11","doi-asserted-by":"crossref","unstructured":"Balntas, V., Lenc, K., Vedaldi, A., & Mikolajczyk, K. (2017). HPatches: A benchmark and evaluation of handcrafted and learned local descriptors. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.410"},{"key":"1385_CR12","doi-asserted-by":"crossref","unstructured":"Balntas, V., Li, S., & Prisacariu, V. (September 2018). RelocNet: continuous metric learning relocalisation using neural nets. In European conference on computer vision.","DOI":"10.1007\/978-3-030-01264-9_46"},{"key":"1385_CR13","doi-asserted-by":"crossref","unstructured":"Balntas, V., Riba, E., Ponsa, D., & Mikolajczyk, K. (2016). Learning local feature descriptors with triplets and shallow convolutional neural networks. In British machine vision conference.","DOI":"10.5244\/C.30.119"},{"key":"1385_CR14","doi-asserted-by":"crossref","unstructured":"Barath, D. & Matas, J. (June 2018). Graph-cut RANSAC. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00704"},{"key":"1385_CR15","doi-asserted-by":"crossref","unstructured":"Barath, D., Matas, J., & Noskova, J. (2019). MAGSAC: Marginalizing sample consensus. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.01044"},{"key":"1385_CR16","unstructured":"Barroso-Laguna, A., Riba, E., Ponsa, D., & Mikolajczyk, K. (2019). Key.Net: Keypoint detection by handcrafted and learned CNN filters. In International conference on computer vision."},{"key":"1385_CR17","doi-asserted-by":"crossref","unstructured":"Baumberg, A. (2000). Reliable feature matching across widely separated views. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2000.855899"},{"key":"1385_CR18","doi-asserted-by":"crossref","unstructured":"Bay, H., Tuytelaars, T., & Van\u00a0Gool, L. (2006). SURF: Speeded up robust features. In European conference on computer vision.","DOI":"10.1007\/11744023_32"},{"key":"1385_CR19","unstructured":"Beaudet, P. R. (Nov. 1978). Rotationally invariant image operators. In Proceedings of the 4th international joint conference on pattern recognition (pp. 579\u2013583). Kyoto."},{"key":"1385_CR20","first-page":"1","volume":"2020","author":"F Bellavia","year":"2020","unstructured":"Bellavia, F., & Colombo, C. (2020). Is there anything new to say about sift matching? International Journal of Computer Vision, 2020, 1\u201320.","journal-title":"International Journal of Computer Vision"},{"key":"1385_CR21","unstructured":"Bian, J.-W., Wu, Y.-H., Zhao, J., Liu, Y., Zhang, L., Cheng, M.-M., & Reid, I. (2019). An evaluation of feature matchers for fundamental matrix estimation. In British machine vision conference."},{"key":"1385_CR22","doi-asserted-by":"crossref","unstructured":"Brachmann, E., & Rother, C. (2019). Neural-guided RANSAC: learning where to sample model hypotheses. In International conference on computer vision.","DOI":"10.1109\/ICCV.2019.00442"},{"key":"1385_CR23","first-page":"122","volume":"120","author":"G Bradski","year":"2000","unstructured":"Bradski, G. (2000). The OpenCV library. Dr. Dobb\u2019s Journal of Software Tools, 120, 122\u2013125.","journal-title":"Dr. Dobb\u2019s Journal of Software Tools"},{"key":"1385_CR24","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1109\/TPAMI.2010.54","volume":"33","author":"M Brown","year":"2011","unstructured":"Brown, M., Hua, G., & Winder, S. (2011). Discriminative learning of local image descriptors. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33, 43\u201357.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1385_CR25","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/s11263-006-0002-3","volume":"74","author":"M Brown","year":"2007","unstructured":"Brown, M., & Lowe, D. (2007). Automatic panoramic image stitching using invariant features. International Journal of Computer Vision, 74, 59\u201373.","journal-title":"International Journal of Computer Vision"},{"key":"1385_CR26","doi-asserted-by":"crossref","unstructured":"Bui, M., Baur, C., Navab, N., Ilic, S., & Albarqouni, S. (October 2019). Adversarial networks for camera pose regression and refinement. In International conference on computer vision.","DOI":"10.1109\/ICCVW.2019.00470"},{"key":"1385_CR27","unstructured":"Chum, O., & Matas, J. (June 2005). Matching with PROSAC\u2014progressive sample consensus. In Conference on computer vision and pattern recognition."},{"key":"1385_CR28","doi-asserted-by":"crossref","unstructured":"Chum, O., Matas, J., & Kittler, J. (2003). Locally optimized RANSAC. In Pattern recognition.","DOI":"10.1007\/978-3-540-45243-0_31"},{"key":"1385_CR29","doi-asserted-by":"crossref","unstructured":"Chum, O., Werner, T., & Matas, J. (2005). Two-view geometry estimation unaffected by a dominant plane. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2005.354"},{"key":"1385_CR30","doi-asserted-by":"crossref","unstructured":"Cui, H., Gao, X., Shen, S., & Hu, Z. (July 2017). Hsfm: Hybrid structure-from-motion. In CVPR.","DOI":"10.1109\/CVPR.2017.257"},{"key":"1385_CR31","doi-asserted-by":"crossref","unstructured":"Dang, Z., Yi, K.\u00a0M., Hu, Y., Wang, F., Fua, P., & Salzmann, M. (2018). Eigendecomposition-free training of deep networks with zero eigenvalue-based losses. In European conference on computer vision.","DOI":"10.1007\/978-3-030-01228-1_47"},{"key":"1385_CR32","unstructured":"Detone, D., Malisiewicz, T., & Rabinovich, A. (2017). Toward geometric deep SLAM. Preprint arXiv:1707.07410."},{"key":"1385_CR33","doi-asserted-by":"crossref","unstructured":"Detone, D., Malisiewicz, T., & Rabinovich, A. (2018). Superpoint: Self-supervised interest point detection and description. CVPR workshop on deep learning for visual SLAM.","DOI":"10.1109\/CVPRW.2018.00060"},{"key":"1385_CR34","doi-asserted-by":"crossref","unstructured":"Dong, J., Karianakis, N., Davis, D., Hernandez, J., Balzer, J., & Soatto, S. (June 2015). Multi-view feature engineering and learning. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7298945"},{"key":"1385_CR35","doi-asserted-by":"crossref","unstructured":"Dong, J. & Soatto, S. (2015). Domain-size pooling in local descriptors: DSP-SIFT. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7299145"},{"key":"1385_CR36","doi-asserted-by":"crossref","unstructured":"Dusmanu, M., Rocco, I., Pajdla, T., Pollefeys, M., Sivic, J., Torii, A., & Sattler, T. (2019). D2-Net: A trainable CNN for joint detection and description of local features. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00828"},{"key":"1385_CR37","doi-asserted-by":"crossref","unstructured":"Ebel, P., Mishchuk, A., Yi, K.\u00a0M., Fua, P., & Trulls, E. (2019). Beyond Cartesian representations for local descriptors. In International conference on computer vision.","DOI":"10.1109\/ICCV.2019.00034"},{"issue":"6","key":"1385_CR38","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/358669.358692","volume":"24","author":"M Fischler","year":"1981","unstructured":"Fischler, M., & Bolles, R. (1981). Random sample consensus: A paradigm for model fitting with applications to image analysis and automated cartography. Communications of the ACM, 24(6), 381\u2013395.","journal-title":"Communications of the ACM"},{"key":"1385_CR39","doi-asserted-by":"crossref","unstructured":"Gay, P., Bansal, V., Rubino, C., & Bue, A.\u00a0D. (2017). Probabilistic structure from motion with objects (PSfMO). In International conference on computer vision.","DOI":"10.1109\/ICCV.2017.334"},{"key":"1385_CR40","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., & Urtasun, R. (2012). Are we ready for autonomous driving? The KITTI vision benchmark suite. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"6","key":"1385_CR41","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1109\/34.601246","volume":"19","author":"R Hartley","year":"1997","unstructured":"Hartley, R. (1997). In defense of the eight-point algorithm. IEEE Transactions on Pattern Analysis and Machine Intelligence, 19(6), 580\u2013593.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1385_CR42","volume-title":"Multiple view geometry in computer vision","author":"R Hartley","year":"2000","unstructured":"Hartley, R., & Zisserman, A. (2000). Multiple view geometry in computer vision. Cambridge: Cambridge University Press."},{"issue":"10","key":"1385_CR43","doi-asserted-by":"publisher","first-page":"1036","DOI":"10.1109\/34.329005","volume":"16","author":"RI Hartley","year":"1994","unstructured":"Hartley, R. I. (1994). Projective reconstruction and invariants from multiple images. IEEE Transactions on Pattern Analysis and Machine Intelligence, 16(10), 1036\u20131041.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1385_CR44","doi-asserted-by":"crossref","unstructured":"He, K., Lu, Y., & Sclaroff, S. (2018). Local descriptors optimized for average precision. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00069"},{"key":"1385_CR45","doi-asserted-by":"crossref","unstructured":"Heinly, J., Schoenberger, J., Dunn, E., & Frahm, J.-M. (2015). Reconstructing the world in six days. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7298949"},{"key":"1385_CR46","doi-asserted-by":"crossref","unstructured":"Jacobs, N., Roman, N., & Pless, R. (2007). Consistent temporal variations in many outdoor scenes. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2007.383258"},{"key":"1385_CR47","doi-asserted-by":"crossref","unstructured":"Kendall, A., Grimes, M., & Cipolla, R. (2015). Posenet: A convolutional network for real-time 6-DOF camera relocalization. In International conference on computer vision.","DOI":"10.1109\/ICCV.2015.336"},{"key":"1385_CR48","unstructured":"Krishna\u00a0Murthy, J., Iyer, G., & Paull, L. (2019). gradSLAM: Dense SLAM meets automatic differentiation."},{"key":"1385_CR49","unstructured":"Lenc, K., Gulshan, V., & Vedaldi, A. (2011). VLBenchmarks. http:\/\/www.vlfeat.org\/benchmarks\/."},{"key":"1385_CR50","doi-asserted-by":"crossref","unstructured":"Leutenegger, S., Chli, M., & Siegwart, R.\u00a0Y. (2011). Brisk: Binary robust invariant scalable keypoints. In International conference on computer vision.","DOI":"10.1109\/ICCV.2011.6126542"},{"key":"1385_CR51","doi-asserted-by":"crossref","unstructured":"Li, Z., & Snavely, N. (2018). MegaDepth: Learning single-view depth prediction from internet photos. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00218"},{"issue":"2","key":"1385_CR52","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"20","author":"DG Lowe","year":"2004","unstructured":"Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 20(2), 91\u2013110.","journal-title":"International Journal of Computer Vision"},{"key":"1385_CR53","doi-asserted-by":"crossref","unstructured":"Luo, Z., Shen, T., Zhou, L., Zhang, J., Yao, Y., Li, S., Fang, T., & Quan, L. (2019). ContextDesc: Local descriptor augmentation with cross-modality context. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00263"},{"key":"1385_CR54","doi-asserted-by":"crossref","unstructured":"Luo, Z., Shen, T., Zhou, L., Zhu, S., Zhang, R., Yao, Y., Fang, T., & Quan, L. (2018). Geodesc: Learning local descriptors by integrating geometry constraints. In European conference on computer vision.","DOI":"10.1007\/978-3-030-01240-3_11"},{"key":"1385_CR55","unstructured":"Lynen, S., Zeisl, B., Aiger, D., Bosse, M., Hesch, J., Pollefeys, M., Siegwart, R., & Sattler, T. (2019). Large-scale, real-time visual-inertial localization revisited. Preprint."},{"issue":"1","key":"1385_CR56","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364916679498","volume":"36","author":"W Maddern","year":"2017","unstructured":"Maddern, W., Pascoe, G., Linegar, C., & Newman, P. (2017). 1 year, 1000 km: The Oxford RobotCar dataset. International Journal of Robotics Research, 36(1), 3\u201315.","journal-title":"International Journal of Robotics Research"},{"issue":"10","key":"1385_CR57","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1016\/j.imavis.2004.02.006","volume":"22","author":"J Matas","year":"2004","unstructured":"Matas, J., Chum, O., Urban, M., & Pajdla, T. (2004). Robust wide-baseline stereo from maximally stable extremal regions. Image and Vision Computing, 22(10), 761\u2013767.","journal-title":"Image and Vision Computing"},{"issue":"10","key":"1385_CR58","doi-asserted-by":"publisher","first-page":"1615","DOI":"10.1109\/TPAMI.2005.188","volume":"27","author":"K Mikolajczyk","year":"2004","unstructured":"Mikolajczyk, K., & Schmid, C. (2004). A performance evaluation of local descriptors. IEEE Transactions on Pattern Analysis and Machine Intelligence, 27(10), 1615\u20131630.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1385_CR59","doi-asserted-by":"crossref","unstructured":"Mikolajczyk, K., Schmid, C., & Zisserman, A. (2004). Human detection based on a probabilistic assembly of robust part detectors. In European conference on computer vision.","DOI":"10.1007\/978-3-540-24670-1_6"},{"key":"1385_CR60","unstructured":"Mishchuk, A., Mishkin, D., Radenovic, F., & Matas, J. (2017). Working hard to know your neighbor\u2019s margins: Local descriptor learning loss. In Advances in neural information processing systems."},{"key":"1385_CR61","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/j.cviu.2015.08.005","volume":"141","author":"D Mishkin","year":"2015","unstructured":"Mishkin, D., Matas, J., & Perdoch, M. (2015). MODS: Fast and robust method for two-view matching. Computer Vision and Image Understanding, 141, 81\u201393.","journal-title":"Computer Vision and Image Understanding"},{"key":"1385_CR62","doi-asserted-by":"crossref","unstructured":"Mishkin, D., Radenovic, F., & Matas, J. (2018). Repeatability is not enough: Learning affine regions via discriminability. In European conference on computer vision.","DOI":"10.1007\/978-3-030-01240-3_18"},{"key":"1385_CR63","unstructured":"Muja, M. & Lowe, D.\u00a0G. (2009). Fast approximate nearest neighbors with automatic algorithm configuration. In International conference on computer vision."},{"key":"1385_CR64","doi-asserted-by":"crossref","unstructured":"Mukundan, A., Tolias, G., & Chum, O. (2019). Explicit spatial encoding for deep local descriptors. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00962"},{"issue":"5","key":"1385_CR65","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1109\/TRO.2015.2463671","volume":"31","author":"R Mur-Artal","year":"2015","unstructured":"Mur-Artal, R., Montiel, J., & Tard\u00f3s, J. (2015). ORB-SLAM: A versatile and accurate monocular SLAM system. IEEE Transactions on Robotics, 31(5), 1147\u20131163.","journal-title":"IEEE Transactions on Robotics"},{"key":"1385_CR66","unstructured":"Nister, D. (June 2003). An efficient solution to the five-point relative pose problem. In Conference on computer vision and pattern recognition."},{"key":"1385_CR67","doi-asserted-by":"crossref","unstructured":"Noh, H., Araujo, A., Sim, J., & nd\u00a0Bohyung\u00a0Han, T.\u00a0W. (2017). Large-scale image retrieval with attentive deep local features. In International conference on computer vision.","DOI":"10.1109\/ICCV.2017.374"},{"key":"1385_CR68","unstructured":"Ono, Y., Trulls, E., Fua, P., & Yi, K.\u00a0M. (2018). LF-Net: Learning local features from images. In Advances in neural information processing systems."},{"key":"1385_CR69","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., et al. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research, 12, 2825\u20132830.","journal-title":"Journal of Machine Learning Research"},{"key":"1385_CR70","doi-asserted-by":"crossref","unstructured":"Pizer, S.\u00a0M., Amburn, E.\u00a0P., Austin, J.\u00a0D., Cromartie, R., Geselowitz, A., Greer, T., ter Haar\u00a0Romeny, B., Zimmerman, J.\u00a0B., & Zuiderveld, K. (1987). Adaptive histogram equalization and its variations. In Computer vision, graphics, and image processing.","DOI":"10.1016\/S0734-189X(87)80186-X"},{"key":"1385_CR71","doi-asserted-by":"crossref","unstructured":"Pritchett, P., & Zisserman, A. (1998). Wide baseline stereo matching. In ICCV (pp. 754\u2013760).","DOI":"10.1109\/ICCV.1998.710802"},{"key":"1385_CR72","unstructured":"Pultar, M., Mishkin, D., & Matas, J. (2019). Leveraging outdoor webcams for local descriptor learning. In Computer vision winter workshop."},{"key":"1385_CR73","unstructured":"Qi, C., Su, H., Mo, K., & Guibas, L. (2017). Pointnet: Deep learning on point sets for 3D classification and segmentation. In Conference on computer vision and pattern recognition."},{"key":"1385_CR74","doi-asserted-by":"crossref","unstructured":"Radenovic, F., Tolias, G., & Chum, O. (2016). CNN image retrieval learns from BoW: Unsupervised fine-tuning with hard examples. In European conference on computer vision.","DOI":"10.1007\/978-3-319-46448-0_1"},{"key":"1385_CR75","doi-asserted-by":"crossref","unstructured":"Ranftl, R. & Koltun, V. (2018). Deep fundamental matrix estimation. In European conference on computer vision.","DOI":"10.1007\/978-3-030-01246-5_18"},{"key":"1385_CR76","unstructured":"Revaud, J., Weinzaepfel, P., De Souza, C., Pion, N., Csurka, G., Cabon, Y., & Humenberger, M. (2019). R2D2: Repeatable and reliable detector and descriptor. Preprint."},{"key":"1385_CR77","unstructured":"Revaud, J., Weinzaepfel, P., de\u00a0Souza, C.\u00a0R., Pion, N., Csurka, G., Cabon, Y., & Humenberger, M. (2019). R2D2: Repeatable and reliable detector and descriptor. In Advances in neural information processing systems."},{"key":"1385_CR78","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1109\/TPAMI.2008.275","volume":"32","author":"E Rosten","year":"2010","unstructured":"Rosten, E., Porter, R., & Drummond, T. (2010). Faster and better: A machine learning approach to corner detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, 32, 105\u2013119.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1385_CR79","doi-asserted-by":"crossref","unstructured":"Rublee, E., Rabaud, V., Konolidge, K., & Bradski, G. (2011). ORB: An efficient alternative to SIFT or SURF. In International conference on computer vision.","DOI":"10.1109\/ICCV.2011.6126544"},{"key":"1385_CR80","doi-asserted-by":"crossref","unstructured":"Sarlin, P., DeTone, D., Malisiewicz, T., & Rabinovich, A. (2020). Superglue: Learning feature matching with graph neural networks. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"1385_CR81","doi-asserted-by":"crossref","unstructured":"Sattler, T., Leibe, B., & Kobbelt, L. (2012). Improving image-based localization by active correspondence search. In European conference on computer vision.","DOI":"10.1007\/978-3-642-33718-5_54"},{"key":"1385_CR82","doi-asserted-by":"crossref","unstructured":"Sattler, T., Maddern, W., Toft, C., Torii, A., Hammarstrand, L., Stenborg, E., Safari, D., Okutomi, M., Pollefeys, M., Sivic, J., Kahl, F., & Pajdla, T. (2018). Benchmarking 6DOF outdoor visual localization in changing conditions. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00897"},{"key":"1385_CR83","doi-asserted-by":"crossref","unstructured":"Sattler, T., Weyand, T., Leibe, B., & Kobbelt, L. (2012). Image retrieval for image-based localization revisited. In British machine vision conference.","DOI":"10.5244\/C.26.76"},{"key":"1385_CR84","doi-asserted-by":"crossref","unstructured":"Sattler, T., Zhou, Q., Pollefeys, M., & Leal-Taixe, L. (2019). Understanding the limitations of CNN-based absolute camera pose regression. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00342"},{"key":"1385_CR85","doi-asserted-by":"crossref","unstructured":"Savinov, N., Seki, A., Ladicky, L., Sattler, T., & Pollefeys, M. (2017). Quad-networks: Unsupervised learning to rank for interest point detection. Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.418"},{"key":"1385_CR86","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J., & Frahm, J. (2016). Structure-from-motion revisited. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2016.445"},{"key":"1385_CR87","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J., Hardmeier, H., Sattler, T., & Pollefeys, M. (2017). Comparative evaluation of hand-crafted and learned local features. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.736"},{"key":"1385_CR88","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J., Zheng, E., Pollefeys, M., & Frahm, J. (2016). Pixelwise view selection for unstructured multi-view stereo. In European conference on computer vision.","DOI":"10.1007\/978-3-319-46487-9_31"},{"key":"1385_CR89","unstructured":"Shi, Y., Zhu, J., Fang, Y., Lien, K., & Gu, J. (2019). Self-supervised learning of depth and ego-motion with differentiable bundle adjustment. Preprint."},{"key":"1385_CR90","doi-asserted-by":"crossref","unstructured":"Simo-serra, E., Trulls, E., Ferraz, L., Kokkinos, I., Fua, P., & Moreno-Noguer, F. (2015). Discriminative learning of deep convolutional feature point descriptors. In International conference on computer vision.","DOI":"10.1109\/ICCV.2015.22"},{"key":"1385_CR91","doi-asserted-by":"crossref","unstructured":"Strecha, C., Hansen, W., Van\u00a0Gool, L., Fua, P., & Thoennessen, U. (2008). On benchmarking camera calibration and multi-view stereo for high resolution imagery. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2008.4587706"},{"key":"1385_CR92","doi-asserted-by":"crossref","unstructured":"Sturm, J., Engelhard, N., Endres, F., Burgard, W., & Cremers, D. (2012). A benchmark for the evaluation of RGB-D SLAM systems. In International conference on intelligent robots and systems.","DOI":"10.1109\/IROS.2012.6385773"},{"key":"1385_CR93","doi-asserted-by":"crossref","unstructured":"Sun, W., Jiang, W., Trulls, E., Tagliasacchi, A., & Yi, K.\u00a0M. (2020). ACNe: Attentive context normalization for robust permutation-equivariant learning. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.01130"},{"key":"1385_CR94","first-page":"1744","volume":"39","author":"H Taira","year":"2019","unstructured":"Taira, H., Okutomi, M., Sattler, T., Cimpoi, M., Pollefeys, M., Sivic, J., et al. (2019). InLoc: indoor visual localization with dense matching and view synthesis. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39, 1744\u20131756.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1385_CR95","unstructured":"Tang, C., & Tan, P. (2019). Ba-Net: dense bundle adjustment network. In International conference on learning representations."},{"key":"1385_CR96","doi-asserted-by":"crossref","unstructured":"Tateno, K., Tombari, F., Laina, I., & Navab, N. (July 2017). CNN-SLAM: Real-time dense monocular slam with learned depth prediction. In CVPR.","DOI":"10.1109\/CVPR.2017.695"},{"key":"1385_CR97","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1145\/2812802","volume":"59","author":"B Thomee","year":"2016","unstructured":"Thomee, B., Shamma, D., Friedland, G., Elizalde, B., Ni, K., Poland, D., et al. (2016). YFCC100M: the new data in multimedia research. Communications of the ACM, 59, 64\u201373.","journal-title":"Communications of the ACM"},{"key":"1385_CR98","doi-asserted-by":"crossref","unstructured":"Tian, Y., Fan, B., & Wu, F. (2017). L2-Net: Deep learning of discriminative patch descriptor in Euclidean space. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.649"},{"key":"1385_CR99","doi-asserted-by":"crossref","unstructured":"Tian, Y., Yu, X., Fan, B., Wu, F., Heijnen, H., & Balntas, V. (2019). SOSNet: Second order similarity regularization for local descriptor learning. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.01127"},{"issue":"3","key":"1385_CR100","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/s11263-015-0810-4","volume":"116","author":"G Tolias","year":"2016","unstructured":"Tolias, G., Avrithis, Y., & J\u00e9gou, H. (2016). Image search with selective match kernels: Aggregation across single and multiple images. IJCV, 116(3), 247\u2013261.","journal-title":"IJCV"},{"key":"1385_CR101","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1006\/cviu.1999.0832","volume":"78","author":"P Torr","year":"2000","unstructured":"Torr, P., & Zisserman, A. (2000). MLESAC: A new robust estimator with application to estimating image geometry. Computer Vision and Image Understanding, 78, 138\u2013156.","journal-title":"Computer Vision and Image Understanding"},{"key":"1385_CR102","unstructured":"Triggs, B., Mclauchlan, P., Hartley, R., & Fitzgibbon, A. (2000). Bundle adjustment\u2014A modern synthesis. In Vision algorithms: Theory and practice (pp. 298\u2013372)."},{"key":"1385_CR103","doi-asserted-by":"crossref","unstructured":"Vedaldi, A., & Fulkerson, B. (2010). Vlfeat: An open and portable library of computer vision algorithms. In Proceedings of the 18th ACM international conference on multimedia, MM\u201910 (pp. 1469\u20131472).","DOI":"10.1145\/1873951.1874249"},{"key":"1385_CR104","doi-asserted-by":"crossref","unstructured":"Verdie, Y., Yi, K.\u00a0M., Fua, P., & Lepetit, V. (2015). TILDE: A temporally invariant learned detector. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7299165"},{"key":"1385_CR105","unstructured":"Vijayanarasimhan, S., Ricco, S., Schmid, C., Sukthankar, R., & Fragkiadaki, K. (2017). SFM-Net: Learning of structure and motion from video. Preprint."},{"key":"1385_CR106","doi-asserted-by":"crossref","unstructured":"Wei, X., Zhang, Y., Gong, Y., & Zheng, N. (2018). Kernelized subspace pooling for deep local descriptors. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00200"},{"key":"1385_CR107","doi-asserted-by":"crossref","unstructured":"Wei, X., Zhang, Y., Li, Z., Fu, Y., & Xue, X. (2020). DeepSFM: Structure from motion via deep bundle adjustment. In European conference on computer vision.","DOI":"10.1007\/978-3-030-58452-8_14"},{"key":"1385_CR108","doi-asserted-by":"crossref","unstructured":"Wu, C. (2013). Towards linear-time incremental structure from motion. In 3DV.","DOI":"10.1109\/3DV.2013.25"},{"key":"1385_CR109","doi-asserted-by":"crossref","unstructured":"Yi, K.\u00a0M., Trulls, E., Lepetit, V., & Fua, P. (2016). LIFT: Learned invariant feature transform. In European conference on computer vision.","DOI":"10.1007\/978-3-319-46466-4_28"},{"key":"1385_CR110","doi-asserted-by":"crossref","unstructured":"Yi, K.\u00a0M., Trulls, E., Ono, Y., Lepetit, V., Salzmann, M., & Fua, P. (2018). Learning to find good correspondences. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00282"},{"key":"1385_CR111","unstructured":"Yoo, A.\u00a0B., Jette, M.\u00a0A., & Grondona, M. (2003). SLURM: Simple Linux utility for resource management. In Workshop on job scheduling strategies for parallel processing (pp. 44\u201360). Berlin: Springer."},{"key":"1385_CR112","doi-asserted-by":"crossref","unstructured":"Zagoruyko, S., & Komodakis, N. (2015). Learning to compare image patches via convolutional neural networks. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7299064"},{"key":"1385_CR113","doi-asserted-by":"crossref","unstructured":"Zhang, J., Sun, D., Luo, Z., Yao, A., Zhou, L., Shen, T., et al. (2019). Learning two-view correspondences and geometry using order-aware network. International conference on computer vision.","DOI":"10.1109\/ICCV.2019.00594"},{"key":"1385_CR114","doi-asserted-by":"crossref","unstructured":"Zhang, J., Sun, D., Luo, Z., Yao, A., Zhou, L., Shen, T., Chen, Y., Quan, L., & Liao, H. (2019). Learning two-view correspondences and geometry using order-aware network. In ICCV.","DOI":"10.1109\/ICCV.2019.00594"},{"key":"1385_CR115","doi-asserted-by":"crossref","unstructured":"Zhang, X., Yu, F.\u00a0X., Karaman, S., & Chang, S.-F. (July 2017). Learning discriminative and transformation covariant local feature detectors. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.523"},{"key":"1385_CR116","doi-asserted-by":"crossref","unstructured":"Zhao, C., Cao, Z., Li, C., Li, X., & Yang, J. (2019). NM-Net: Mining reliable neighbors for robust feature correspondences. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00030"},{"key":"1385_CR117","doi-asserted-by":"crossref","unstructured":"Zhou, Q., Sattler, T., Pollefeys, M., & Leal-Taixe, L. (2020). To learn or not to learn: Visual localization from essential matrices. In ICRA.","DOI":"10.1109\/ICRA40945.2020.9196607"},{"key":"1385_CR118","doi-asserted-by":"crossref","unstructured":"Zhu, S., Zhang, R., Zhou, L., Shen, T., Fang, T., Tan, P., & Quan, L. (June 2018). Very large-scale global SFM by distributed motion averaging. In Conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00480"},{"key":"1385_CR119","doi-asserted-by":"crossref","unstructured":"Zitnick, C., & Ramnath, K. (2011). Edge foci interest points. In International conference on computer vision.","DOI":"10.1109\/ICCV.2011.6126263"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01385-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-020-01385-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01385-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,22]],"date-time":"2022-11-22T12:54:47Z","timestamp":1669121687000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-020-01385-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,7]]},"references-count":119,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,2]]}},"alternative-id":["1385"],"URL":"https:\/\/doi.org\/10.1007\/s11263-020-01385-0","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,10,7]]},"assertion":[{"value":"8 May 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 September 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}