{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T16:49:24Z","timestamp":1764175764146,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031730207"},{"type":"electronic","value":"9783031730214"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73021-4_3","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T09:14:02Z","timestamp":1732094042000},"page":"36-53","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Benchmarking the\u00a0Robustness of\u00a0Cross-View Geo-Localization Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9510-3203","authenticated-orcid":false,"given":"Qingwang","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3475-6186","authenticated-orcid":false,"given":"Yingying","family":"Zhu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Gronat, P., Torii, A., Pajdla, T., Sivic, J.: NetVLAD: CNN architecture for weakly supervised place recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5297\u20135307 (2016)","DOI":"10.1109\/CVPR.2016.572"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Chaabane, M., Gueguen, L., Trabelsi, A., Beveridge, R., O\u2019Hara, S.: End-to-end learning improves static object geo-localization from video. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2063\u20132072 (2021)","DOI":"10.1109\/WACV48630.2021.00211"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Dong, Y., et al.: Benchmarking robustness of 3D object detection to common corruptions in autonomous driving. arXiv preprint arXiv:2303.11040 (2023)","DOI":"10.1109\/CVPR52729.2023.00105"},{"key":"3_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"3_CR6","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vision 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Friedland, G., Vinyals, O., Darrell, T.: Multimodal location estimation. In: Proceedings of the 18th ACM International Conference on Multimedia, pp. 1245\u20131252 (2010)","DOI":"10.1145\/1873951.1874197"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Fu, X., Huang, J., Zeng, D., Huang, Y., Ding, X., Paisley, J.: Removing rain from single images via a deep detail network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3855\u20133863 (2017)","DOI":"10.1109\/CVPR.2017.186"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Gatys, L.A., Ecker, A.S., Bethge, M.: Image style transfer using convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2414\u20132423 (2016)","DOI":"10.1109\/CVPR.2016.265"},{"key":"3_CR10","unstructured":"Geirhos, R., Rubisch, P., Michaelis, C., Bethge, M., Wichmann, F.A., Brendel, W.: ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. arXiv preprint arXiv:1811.12231 (2018)"},{"key":"3_CR11","unstructured":"Geirhos, R., Rubisch, P., Michaelis, C., Bethge, M., Wichmann, F., Brendel, W.: ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. In: International Conference on Learning Representations, International Conference on Learning Representations (2018)"},{"key":"3_CR12","unstructured":"Geirhos, R., Temme, C.R., Rauber, J., Sch\u00fctt, H.H., Bethge, M., Wichmann, F.A.: Generalisation in humans and deep neural networks. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"3_CR14","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.imavis.2017.07.003","volume":"68","author":"C H\u00e4ne","year":"2017","unstructured":"H\u00e4ne, C., et al.: 3D visual perception for self-driving cars using a multi-camera system: calibration, mapping, localization, and obstacle detection. Image Vis. Comput. 68, 14\u201327 (2017)","journal-title":"Image Vis. Comput."},{"issue":"12","key":"3_CR15","first-page":"2341","volume":"33","author":"K He","year":"2010","unstructured":"He, K., Sun, J., Tang, X.: Single image haze removal using dark channel prior. IEEE Trans. Pattern Anal. Mach. Intell. 33(12), 2341\u20132353 (2010)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3_CR16","unstructured":"Hendrycks, D., Dietterich, T.: Benchmarking neural network robustness to common corruptions and perturbations. arXiv preprint arXiv:1903.12261 (2019)"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Hu, S., Feng, M., Nguyen, R.M., Lee, G.H.: CVM-net: cross-view matching network for image-based ground-to-aerial geo-localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7258\u20137267 (2018)","DOI":"10.1109\/CVPR.2018.00758"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Kamann, C., Rother, C.: Benchmarking the robustness of semantic segmentation models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8828\u20138838 (2020)","DOI":"10.1109\/CVPR42600.2020.00885"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Lentsch, T., Xia, Z., Caesar, H., Kooij, J.F.: SliceMatch: geometry-guided aggregation for cross-view pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17225\u201317234 (2023)","DOI":"10.1109\/CVPR52729.2023.01652"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Li, Y., Tan, R.T., Guo, X., Lu, J., Brown, M.S.: Rain streak removal using layer priors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2736\u20132744 (2016)","DOI":"10.1109\/CVPR.2016.299"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Liu, L., Li, H.: Lending orientation to neural networks for cross-view geo-localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5624\u20135633 (2019)","DOI":"10.1109\/CVPR.2019.00577"},{"issue":"6","key":"3_CR22","doi-asserted-by":"publisher","first-page":"3064","DOI":"10.1109\/TIP.2018.2806202","volume":"27","author":"YF Liu","year":"2018","unstructured":"Liu, Y.F., Jaw, D.W., Huang, S.C., Hwang, J.N.: DesnowNet: context-aware deep network for snow removal. IEEE Trans. Image Process. 27(6), 3064\u20133073 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"McManus, C., Churchill, W., Maddern, W., Stewart, A.D., Newman, P.: Shady dealings: robust, long-term visual localisation using illumination invariance. In: 2014 IEEE International Conference on Robotics and Automation (ICRA), pp. 901\u2013906. IEEE (2014)","DOI":"10.1109\/ICRA.2014.6906961"},{"key":"3_CR25","unstructured":"Michaelis, C., et al.: Benchmarking robustness in object detection: autonomous driving when winter is coming. arXiv preprint arXiv:1907.07484 (2019)"},{"key":"3_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1007\/978-3-319-10605-2_18","volume-title":"Computer Vision \u2013 ECCV 2014","author":"S Middelberg","year":"2014","unstructured":"Middelberg, S., Sattler, T., Untzelmann, O., Kobbelt, L.: Scalable 6-DOF localization on mobile devices. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part II. LNCS, vol. 8690, pp. 268\u2013283. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10605-2_18"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Mithun, N.C., et al.: Cross-view visual geo-localization for outdoor augmented reality. In: 2023 IEEE Conference Virtual Reality and 3D User Interfaces (VR), pp. 493\u2013502. IEEE (2023)","DOI":"10.1109\/VR55154.2023.00064"},{"issue":"3","key":"3_CR28","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1016\/s0734-189x(87)80186-x","volume":"39","author":"SM Pizer","year":"1987","unstructured":"Pizer, S.M., et al.: Adaptive histogram equalization and its variations. Comput. Vision Graph. Image Process. 39(3), 355\u2013368 (1987). https:\/\/doi.org\/10.1016\/s0734-189x(87)80186-x","journal-title":"Comput. Vision Graph. Image Process."},{"key":"3_CR29","doi-asserted-by":"crossref","unstructured":"Regmi, K., Borji, A.: Cross-view image synthesis using conditional GANs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3501\u20133510 (2018)","DOI":"10.1109\/CVPR.2018.00369"},{"issue":"12","key":"3_CR30","doi-asserted-by":"publisher","first-page":"10009","DOI":"10.1109\/TPAMI.2022.3140750","volume":"44","author":"Y Shi","year":"2022","unstructured":"Shi, Y., Campbell, D., Yu, X., Li, H.: Geometry-guided street-view panorama synthesis from satellite imagery. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 10009\u201310022 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3_CR31","doi-asserted-by":"crossref","unstructured":"Shi, Y., Li, H.: Beyond cross-view image retrieval: Highly accurate vehicle localization using satellite image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17010\u201317020 (2022)","DOI":"10.1109\/CVPR52688.2022.01650"},{"key":"3_CR32","unstructured":"Shi, Y., Liu, L., Yu, X., Li, H.: Spatial-aware feature aggregation for image based cross-view geo-localization. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Shi, Y., Yu, X., Campbell, D., Li, H.: Where am i looking at? Joint location and orientation estimation by cross-view matching. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4064\u20134072 (2020)","DOI":"10.1109\/CVPR42600.2020.00412"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Shi, Y., Yu, X., Liu, L., Zhang, T., Li, H.: Optimal feature transport for cross-view image geo-localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 11990\u201311997 (2020)","DOI":"10.1609\/aaai.v34i07.6875"},{"key":"3_CR35","doi-asserted-by":"crossref","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition (2015)","DOI":"10.1109\/ICCV.2015.314"},{"key":"3_CR36","doi-asserted-by":"crossref","unstructured":"Tang, H., Xu, D., Yan, Y., Torr, P.H., Sebe, N.: Local class-specific and global image-level generative adversarial networks for semantic-guided scene generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7870\u20137879 (2020)","DOI":"10.1109\/CVPR42600.2020.00789"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Toker, A., Zhou, Q., Maximov, M., Leal-Taix\u00e9, L.: Coming down to earth: satellite-to-street view synthesis for geo-localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6488\u20136497 (2021)","DOI":"10.1109\/CVPR46437.2021.00642"},{"key":"3_CR38","unstructured":"Vasiljevic, I., Chakrabarti, A., Shakhnarovich, G.: Examining the impact of blur on recognition by convolutional networks. arXiv preprint arXiv:1611.05760 (2016)"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Workman, S., Souvenir, R., Jacobs, N.: Wide-area image geolocalization with aerial reference imagery. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3961\u20133969 (2015)","DOI":"10.1109\/ICCV.2015.451"},{"key":"3_CR40","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1007\/978-3-031-19842-7_6","volume-title":"Computer Vision \u2013 ECCV 2022","author":"Z Xia","year":"2022","unstructured":"Xia, Z., Booij, O., Manfredi, M., Kooij, J.F.: Visual cross-view metric localization with dense uncertainty estimates. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13699, pp. 90\u2013106. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19842-7_6"},{"key":"3_CR41","unstructured":"Yang, H., Lu, X., Zhu, Y.: Cross-view geo-localization with layer-to-layer transformer. In: Advances in Neural Information Processing Systems, vol. 34, pp. 29009\u201329020 (2021)"},{"key":"3_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, X., Li, X., Sultani, W., Zhou, Y., Wshah, S.: Cross-view geo-localization via learning disentangled geometric layout correspondence. arXiv preprint arXiv:2212.04074 (2022)","DOI":"10.1609\/aaai.v37i3.25457"},{"key":"3_CR43","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2019","unstructured":"Zhou, B., et al.: Semantic understanding of scenes through the ADE20K dataset. Int. J. Comput. Vision 127, 302\u2013321 (2019)","journal-title":"Int. J. Comput. Vision"},{"key":"3_CR44","doi-asserted-by":"crossref","unstructured":"Zhu, S., Shah, M., Chen, C.: TransGeo: transformer is all you need for cross-view image geo-localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1162\u20131171 (2022)","DOI":"10.1109\/CVPR52688.2022.00123"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73021-4_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T16:16:47Z","timestamp":1733069807000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73021-4_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031730207","9783031730214"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73021-4_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}