{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T15:42:21Z","timestamp":1779291741706,"version":"3.51.4"},"publisher-location":"Cham","reference-count":53,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729881","type":"print"},{"value":"9783031729898","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72989-8_10","type":"book-chapter","created":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T17:02:04Z","timestamp":1729875724000},"page":"173-190","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Contrastive Ground-Level Image and\u00a0Remote Sensing Pre-training Improves Representation Learning for\u00a0Natural World Imagery"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2892-706X","authenticated-orcid":false,"given":"Andy V.","family":"Huynh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2496-8035","authenticated-orcid":false,"given":"Lauren E.","family":"Gillespie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jael","family":"Lopez-Saucedo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Claire","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rohan","family":"Sikand","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5711-0700","authenticated-orcid":false,"given":"Mois\u00e9s","family":"Exp\u00f3sito-Alonso","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,26]]},"reference":[{"key":"10_CR1","unstructured":"www.inaturalist.org"},{"key":"10_CR2","unstructured":"US Department of Agriculture: USDA national agricultural statistics service cropland data layer (2016). https:\/\/croplandcros.scinet.usda.gov\/. Accessed 16 Oct 2022"},{"key":"10_CR3","unstructured":"Ayush, K., et al.: Geography-aware self-supervised learning. CoRR abs\/2011.09980 (2020). https:\/\/arxiv.org\/abs\/2011.09980"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Bastani, F., Wolters, P., Gupta, R., Ferdinando, J., Kembhavi, A.: SatlasPretrain: a large-scale dataset for remote sensing image understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16772\u201316782, October 2023","DOI":"10.1109\/ICCV51070.2023.01538"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Beery, S., et al.: The auto arborist dataset: a large-scale benchmark for multiview urban forest monitoring under domain shift. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21294\u201321307 (2022)","DOI":"10.1109\/CVPR52688.2022.02061"},{"key":"10_CR6","unstructured":"Botella, C., et al.: Overview of GeoLifeCLEF 2023: species composition prediction with high spatial resolution at continental scale using remote sensing. Working Notes of CLEF (2023)"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Cai, S., Guo, Y., Khan, S., Hu, J., Wen, G.: Ground-to-aerial image geo-localization with a hard exemplar reweighting triplet loss. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00848"},{"key":"10_CR8","unstructured":"Cepeda, V.V., Nayak, G.K., Shah, M.: GeoCLIP: clip-inspired alignment between locations and images for effective worldwide geo-localization. arXiv preprint arXiv:2309.16020 (2023)"},{"key":"10_CR9","unstructured":"Chen, X., Fan, H., Girshick, R., He, K.: Improved baselines with momentum contrastive learning (2020)"},{"key":"10_CR10","doi-asserted-by":"publisher","unstructured":"Cherti, M., et al.: Reproducible scaling laws for contrastive language-image learning, December 2022. https:\/\/doi.org\/10.48550\/arXiv.2212.07143 [cs]","DOI":"10.48550\/arXiv.2212.07143"},{"key":"10_CR11","unstructured":"Cole, E., et al.: The GeoLifeCLEF 2020 Dataset (2020). http:\/\/arxiv.org\/abs\/2004.04192"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Cole, E., Yang, X., Wilber, K., Mac\u00a0Aodha, O., Belongie, S.: When does contrastive visual representation learning work? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14755\u201314764, June 2022","DOI":"10.1109\/CVPR52688.2022.01434"},{"key":"10_CR13","unstructured":"Cong, Y., et al.: SatMAE: pre-training transformers for temporal and multi-spectral satellite imagery. In: Advances in Neural Information Processing Systems, vol. 35, pp. 197\u2013211 (2022)"},{"key":"10_CR14","unstructured":"Deneu, B., Servajean, M., Bonnet, P., Munoz, F., Joly, A.: Participation of LIRMM\/Inria to the GeoLifeCLEF 2020 challenge, November 2020. https:\/\/hal.inria.fr\/hal-02989084"},{"issue":"11","key":"10_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1126\/sciadv.aaz0414","volume":"5","author":"BJ Enquist","year":"2019","unstructured":"Enquist, B.J., et al.: The commonness of rarity: global and future distribution of rarity across land plants. Sci. Adv. 5(11), 1\u201314 (2019). https:\/\/doi.org\/10.1126\/sciadv.aaz0414","journal-title":"Sci. Adv."},{"key":"10_CR16","unstructured":"ESRI: World continents base map (2023). https:\/\/hub.arcgis.com\/datasets\/esri::world-continents\/explore. Accessed 11 Nov 2023"},{"key":"10_CR17","doi-asserted-by":"publisher","unstructured":"Fick, S.E., Hijmans, R.J.: WorldClim 2: new 1-km spatial resolution climate surfaces for global land areas. Int. J. Climatol. 37(12), 4302\u20134315 (2017). https:\/\/doi.org\/10.1002\/joc.5086. https:\/\/rmets.onlinelibrary.wiley.com\/doi\/abs\/10.1002\/joc.5086. _eprint: https:\/\/rmets.onlinelibrary.wiley.com\/doi\/pdf\/10.1002\/joc.5086","DOI":"10.1002\/joc.5086"},{"key":"10_CR18","unstructured":"Garcin, C., et al.: Pl@ntNet-300K: a plant image dataset with high label ambiguity and a long-tailed distribution. In: Vanschoren, J., Yeung, S. (eds.) Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks, vol.\u00a01. Curran (2021). https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/7e7757b1e12abcb736ab9a754ffb617a-Paper-round2.pdf"},{"key":"10_CR19","doi-asserted-by":"publisher","unstructured":"Gillespie, L.E., Ruffley, M., Exposito-Alonso, M.: Deep learning models map rapid plant species changes from citizen science and remote sensing data. Proc. Nat. Acad. Sci. 121(37), e2318296121 (2024). https:\/\/doi.org\/10.1073\/pnas.2318296121","DOI":"10.1073\/pnas.2318296121"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Haas, L., Skreta, M., Alberti, S., Finn, C.: PIGEON: predicting image geolocations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12893\u201312902, June 2024","DOI":"10.1109\/CVPR52733.2024.01225"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. arXiv:2111.06377 (2021)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"10_CR22","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. CoRR abs\/1512.03385 (2015). http:\/\/arxiv.org\/abs\/1512.03385"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Van\u00a0Horn, G., Cole, E., Beery, S., Wilber, K., Belongie, S., Mac\u00a0Aodha, O.: Benchmarking representation learning for natural world image collections. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12884\u201312893, June 2021","DOI":"10.1109\/CVPR46437.2021.01269"},{"key":"10_CR24","unstructured":"Horn, G.V., Perona, P.: The devil is in the tails: fine-grained classification in the wild (2017)"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Hu, S., Feng, M., Nguyen, R.M., Lee, G.H.: CVM-Net: cross-view matching network for image-based ground-to-aerial geo-localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7258\u20137267 (2018)","DOI":"10.1109\/CVPR.2018.00758"},{"key":"10_CR26","doi-asserted-by":"crossref","unstructured":"Jean, N., Wang, S., Samar, A., Azzari, G., Lobell, D., Ermon, S.: Tile2Vec: unsupervised representation learning for spatially distributed data. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 3967\u20133974 (2019)","DOI":"10.1609\/aaai.v33i01.33013967"},{"issue":"3","key":"10_CR27","doi-asserted-by":"publisher","first-page":"2598","DOI":"10.1109\/TGRS.2020.3007029","volume":"59","author":"J Kang","year":"2021","unstructured":"Kang, J., Fernandez-Beltran, R., Duan, P., Liu, S., Plaza, A.J.: Deep unsupervised embedding for remotely sensed images based on spatially augmented momentum contrast. IEEE Trans. Geosci. Remote Sens. 59(3), 2598\u20132610 (2021). https:\/\/doi.org\/10.1109\/TGRS.2020.3007029","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10_CR28","doi-asserted-by":"publisher","unstructured":"Lacoste, A., et al.: GEO-bench: toward foundation models for earth monitoring, June 2023. https:\/\/doi.org\/10.48550\/arXiv.2306.03831. http:\/\/arxiv.org\/abs\/2306.03831 [cs]","DOI":"10.48550\/arXiv.2306.03831"},{"key":"10_CR29","first-page":"1","volume":"60","author":"W Li","year":"2021","unstructured":"Li, W., Chen, K., Chen, H., Shi, Z.: Geographical knowledge-driven representation learning for remote sensing images. IEEE Trans. Geosci. Remote Sens. 60, 1\u201316 (2021)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10_CR30","doi-asserted-by":"crossref","unstructured":"Liu, L., Li, H.: Lending orientation to neural networks for cross-view geo-localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00577"},{"key":"10_CR31","unstructured":"Mai, G., Lao, N., He, Y., Song, J., Ermon, S.: CSP: self-supervised contrastive spatial pre-training for geospatial-visual representations (2023)"},{"key":"10_CR32","doi-asserted-by":"crossref","unstructured":"Mall, U., Hariharan, B., Bala, K.: Change-aware sampling and contrastive learning for satellite images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5261\u20135270, June 2023","DOI":"10.1109\/CVPR52729.2023.00509"},{"key":"10_CR33","unstructured":"Mall, U., Phoo, C.P., Liu, M.K., Vondrick, C., Hariharan, B., Bala, K.: Remote sensing vision-language foundation models without annotations via ground remote alignment. In: ICLR (2024)"},{"key":"10_CR34","doi-asserted-by":"crossref","unstructured":"Ma\u00f1as, O., Lacoste, A., Gir\u00f3-i Nieto, X., Vazquez, D., Rodr\u00edguez, P.: Seasonal contrast: unsupervised pre-training from uncurated remote sensing data, pp. 9414\u20139423 (2021). https:\/\/openaccess.thecvf.com\/content\/ICCV2021\/html\/Manas_Seasonal_Contrast_Unsupervised_Pre-Training_From_Uncurated_Remote_Sensing_Data_ICCV_2021_paper.html","DOI":"10.1109\/ICCV48922.2021.00928"},{"key":"10_CR35","unstructured":"NVIDIA: Resnet v1.5 for PyTorch (2023). https:\/\/catalog.ngc.nvidia.com\/orgs\/nvidia\/resources\/resnet_50_v1_5_for_pytorch. Accessed 24 Nov 2023"},{"key":"10_CR36","unstructured":"Oord, A.V.D., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding, January 2019. arXiv:1807.03748 [cs, stat].zSCC: NoCitationData[s0]"},{"key":"10_CR37","doi-asserted-by":"publisher","unstructured":"Ouaknine, A., Kattenborn, T., Lalibert\u00e9, E., Rolnick, D.: OpenForest: a data catalogue for machine learning in forest monitoring, November 2023. https:\/\/doi.org\/10.48550\/arXiv.2311.00277. http:\/\/arxiv.org\/abs\/2311.00277 [cs]","DOI":"10.48550\/arXiv.2311.00277"},{"key":"10_CR38","doi-asserted-by":"crossref","unstructured":"Pantazis, O., Brostow, G.J., Jones, K.E., Mac\u00a0Aodha, O.: Focus on the positives: self-supervised learning for biodiversity monitoring. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10583\u201310592, October 2021","DOI":"10.1109\/ICCV48922.2021.01041"},{"key":"10_CR39","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision (2021)"},{"key":"10_CR40","doi-asserted-by":"publisher","unstructured":"Randin, C.F., et al.: Monitoring biodiversity in the anthropocene using remote sensing in species distribution models. Remote Sens. Environ. 239, 111626 (2020). https:\/\/doi.org\/10.1016\/j.rse.2019.111626. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0034425719306467","DOI":"10.1016\/j.rse.2019.111626"},{"key":"10_CR41","unstructured":"Sagawa, S., et al.: Extending the WILDS benchmark for unsupervised adaptation (2022)"},{"key":"10_CR42","doi-asserted-by":"crossref","unstructured":"Sastry, S., Khanal, S., Dhakal, A., Huang, D., Jacobs, N.: BirdSAT: cross-view contrastive masked autoencoders for bird species classification and mapping. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 7136\u20137145 (2024)","DOI":"10.1109\/WACV57701.2024.00698"},{"key":"10_CR43","unstructured":"Shi, Y., Liu, L., Yu, X., Li, H.: Spatial-aware feature aggregation for image based cross-view geo-localization. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"10_CR44","doi-asserted-by":"crossref","unstructured":"Shugaev, M., et al.: ArcGeo: localizing limited field-of-view images using cross-view matching. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 209\u2013218, January 2024","DOI":"10.1109\/WACV57701.2024.00028"},{"key":"10_CR45","unstructured":"Swope, A.M., Rudelis, X.H., Story, K.T.: Representation learning for remote sensing: an unsupervised sensor fusion approach. CoRR abs\/2108.05094 (2021). https:\/\/arxiv.org\/abs\/2108.05094"},{"key":"10_CR46","unstructured":"Teng, M., et al.: SatBird: bird species distribution modeling with remote sensing and citizen science data. arXiv preprint arXiv:2311.00936 (2023)"},{"issue":"1","key":"10_CR47","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1038\/s41467-022-27980-y","volume":"13","author":"D Tuia","year":"2022","unstructured":"Tuia, D., et al.: Perspectives in machine learning for wildlife conservation. Nat. Commun. 13(1), 792 (2022)","journal-title":"Nat. Commun."},{"key":"10_CR48","unstructured":"United States Department of Agriculture: NAIP Imagery (2023). https:\/\/naip-usdaonline.hub.arcgis.com\/"},{"key":"10_CR49","doi-asserted-by":"crossref","unstructured":"Van\u00a0Horn, G., et al.: The iNaturalist species classification and detection dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018","DOI":"10.1109\/CVPR.2018.00914"},{"key":"10_CR50","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"494","DOI":"10.1007\/978-3-319-46448-0_30","volume-title":"Computer Vision \u2013 ECCV 2016","author":"NN Vo","year":"2016","unstructured":"Vo, N.N., Hays, J.: Localizing and orienting street views using overhead imagery. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 494\u2013509. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_30"},{"key":"10_CR51","doi-asserted-by":"crossref","unstructured":"Workman, S., Souvenir, R., Jacobs, N.: Wide-area image geolocalization with aerial reference imagery. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3961\u20133969 (2015)","DOI":"10.1109\/ICCV.2015.451"},{"key":"10_CR52","doi-asserted-by":"publisher","unstructured":"Zheng, Z., Wei, Y., Yang, Y.: University-1652: a multi-view multi-source benchmark for drone-based geo-localization. In: Proceedings of the 28th ACM International Conference on Multimedia, MM 2020, pp. 1395\u20131403. Association for Computing Machinery, New York, NY, USA (2020). https:\/\/doi.org\/10.1145\/3394171.3413896","DOI":"10.1145\/3394171.3413896"},{"key":"10_CR53","doi-asserted-by":"crossref","unstructured":"Zhu, S., Yang, T., Chen, C.: VIGOR: cross-view image geo-localization beyond one-to-one retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3640\u20133649, June 2021","DOI":"10.1109\/CVPR46437.2021.00364"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72989-8_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T17:11:48Z","timestamp":1729876308000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72989-8_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,26]]},"ISBN":["9783031729881","9783031729898"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72989-8_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,26]]},"assertion":[{"value":"26 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}