{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T19:44:54Z","timestamp":1765568694284,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,3]]},"DOI":"10.1145\/3748636.3764598","type":"proceedings-article","created":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T19:11:30Z","timestamp":1765566690000},"page":"686-695","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AddressBind: Cross-modal Alignment of Addresses and Geocodes"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4724-4128","authenticated-orcid":false,"family":"Govind","sequence":"first","affiliation":[{"name":"Amazon, Bangalore, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6477-0376","authenticated-orcid":false,"given":"Sayan","family":"Putatunda","sequence":"additional","affiliation":[{"name":"Amazon, Bangalore, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6576-7788","authenticated-orcid":false,"given":"Saurabh","family":"Sohoney","sequence":"additional","affiliation":[{"name":"Amazon, Hyderabad, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"MTEB: Leaderboard for Massive Multilingual Text Embedding Benchmark. https:\/\/huggingface.co\/spaces\/mteb\/leaderboard. [Online","year":"2025","unstructured":"2025. MTEB: Leaderboard for Massive Multilingual Text Embedding Benchmark. https:\/\/huggingface.co\/spaces\/mteb\/leaderboard. [Online; accessed 25-May-2025]."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1080\/13658816.2018.1504949"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-19231-9_3"},{"key":"e_1_3_2_1_4_1","volume-title":"Vicente Vivanco Cepeda, and Mubarak Shah.","author":"Clark Brandon","year":"2023","unstructured":"Brandon Clark, Alec Kerrigan, Parth Parag Kulkarni, Vicente Vivanco Cepeda, and Mubarak Shah. 2023. Where We Are and What We're Looking At: Query Based Worldwide Image Geo-localization Using Hierarchies and Scenes. arXiv:2303.04249 [cs.CV]"},{"key":"e_1_3_2_1_5_1","volume-title":"MMTEB: Massive Multilingual Text Embedding Benchmark. arXiv:2502.13595 [cs.CL] https:\/\/arxiv.org\/abs\/2502.13595","author":"Kenneth Enevoldsen","year":"2025","unstructured":"Kenneth Enevoldsen et al. 2025. MMTEB: Massive Multilingual Text Embedding Benchmark. arXiv:2502.13595 [cs.CL] https:\/\/arxiv.org\/abs\/2502.13595"},{"key":"e_1_3_2_1_6_1","volume-title":"Simcse: Simple contrastive learning of sentence embeddings. arXiv preprint arXiv:2104.08821","author":"Gao Tianyu","year":"2021","unstructured":"Tianyu Gao, Xingcheng Yao, and Danqi Chen. 2021. Simcse: Simple contrastive learning of sentence embeddings. arXiv preprint arXiv:2104.08821 (2021)."},{"key":"e_1_3_2_1_7_1","volume-title":"Armand Joulin, and Ishan Misra.","author":"Girdhar Rohit","year":"2023","unstructured":"Rohit Girdhar, Alaaeldin El-Nouby, Zhuang Liu, Mannat Singh, Kalyan Vasudev Alwala, Armand Joulin, and Ishan Misra. 2023. ImageBind: One Embedding Space To Bind Them All. arXiv:2305.05665 [cs.CV]"},{"key":"e_1_3_2_1_8_1","volume-title":"Linking Theory and Practice of Digital Libraries","author":"Alec C\u00e9line","unstructured":"Govind, C\u00e9line Alec, Jean-Luc Manguin, and Marc Spaniol. 2021. FETD2: A Framework for Enabling Textual Data Denoising via Robust Contextual Embeddings. In Linking Theory and Practice of Digital Libraries, Gerd Berget, Mark Michael Hall, Daniel Brenn, and Sanna Kumpulainen (Eds.). Springer International Publishing, Cham, 3\u201316."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-industry.33"},{"key":"e_1_3_2_1_10_1","volume-title":"2008 IEEE Conference on Computer Vision and Pattern Recognition (2008","author":"Hays James","year":"2061","unstructured":"James Hays and Alexei A. Efros. 2008. IM2GPS: estimating geographic information from a single image. 2008 IEEE Conference on Computer Vision and Pattern Recognition (2008), 1\u20138. https:\/\/api.semanticscholar.org\/CorpusID:2061602"},{"key":"e_1_3_2_1_11_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7\u20139, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_2_1_12_1","volume-title":"General-Purpose Location Embeddings with Satellite Imagery. arXiv preprint arXiv:2311.17179","author":"Klemmer Konstantin","year":"2023","unstructured":"Konstantin Klemmer, Esther Rolf, Caleb Robinson, Lester Mackey, and Marc Ru\u00dfwurm. 2023. SatCLIP: Global, General-Purpose Location Embeddings with Satellite Imagery. arXiv preprint arXiv:2311.17179 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"GeoChat: Grounded Large Vision-Language Model for Remote Sensing. The IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Kuckreja Kartik","year":"2024","unstructured":"Kartik Kuckreja, Muhammad S. Danish, Muzammal Naseer, Abhijit Das, Salman Khan, and Fahad S. Khan. 2024. GeoChat: Grounded Large Vision-Language Model for Remote Sensing. The IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2024)."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 814, 13 pages."},{"key":"e_1_3_2_1_15_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. arXiv:2201.12086 [cs.CV]","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. arXiv:2201.12086 [cs.CV]"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611975673.8"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. 10.48550\/ARXIV.1907.11692","DOI":"10.48550\/ARXIV.1907.11692"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543873.3584647"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Mai Gengchen","year":"2023","unstructured":"Gengchen Mai, Ni Lao, Yutong He, Jiaming Song, and Stefano Ermon. 2023. CSP: self-supervised contrastive spatial pre-training for geospatial-visual representations. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 981, 18 pages."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3557915.3561016"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3390\/ijgi9120698"},{"key":"e_1_3_2_1_23_1","volume-title":"Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arXiv:2103.00020 [cs.CV]"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/2981562.2981710"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_26_1","unstructured":"Marc Ru\u00dfwurm Konstantin Klemmer Esther Rolf Robin Zbinden and Devis Tuia. 2024. Geographic Location Encoding with Spherical Harmonics and Sinusoidal Representation Networks. ICLR. arXiv:2310.06743"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-industry.29"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397536.3422254"},{"key":"e_1_3_2_1_29_1","unstructured":"Matthew Tancik Pratul P. Srinivasan Ben Mildenhall Sara Fridovich-Keil Nithin Raghavan Utkarsh Singhal Ravi Ramamoorthi Jonathan T. Barron and Ren Ng. 2020. Fourier Features Let Networks Learn High Frequency Functions in Low Dimensional Domains. arXiv:2006.10739 [cs.CV]"},{"key":"e_1_3_2_1_30_1","unstructured":"Aaron van den Oord Yazhe Li and Oriol Vinyals. 2019. Representation Learning with Contrastive Predictive Coding. arXiv:1807.03748 [cs.LG] https:\/\/arxiv.org\/abs\/1807.03748"},{"key":"e_1_3_2_1_31_1","volume-title":"Gaurav Kumar Nayak, and Mubarak Shah","author":"Vivanco Vicente","year":"2023","unstructured":"Vicente Vivanco, Gaurav Kumar Nayak, and Mubarak Shah. 2023. GeoCLIP: Clip-Inspired Alignment between Locations and Images for Effective Worldwide Geo-localization. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_32_1","unstructured":"Liang Wang Nan Yang Xiaolong Huang Linjun Yang Rangan Majumder and Furu Wei. 2024. Multilingual E5 Text Embeddings: A Technical Report. arXiv:2402.05672 [cs.CL] https:\/\/arxiv.org\/abs\/2402.05672"},{"key":"e_1_3_2_1_33_1","unstructured":"Ellen D. Zhong Tristan Bepler Joseph H. Davis and Bonnie Berger. 2020. Reconstructing continuous distributions of 3D protein structure from cryo-EM images. arXiv:1909.05215 [q-bio.QM]"},{"key":"e_1_3_2_1_34_1","volume-title":"Zhifeng Li, Wei Liu, and Li Yuan.","author":"Zhu Bin","year":"2023","unstructured":"Bin Zhu, Bin Lin, Munan Ning, Yang Yan, Jiaxi Cui, Wang HongFa, Yatian Pang, Wenhao Jiang, Junwu Zhang, Zongwei Li, Cai Wan Zhang, Zhifeng Li, Wei Liu, and Li Yuan. 2023. LanguageBind: Extending Video-Language Pretraining to N-modality by Language-based Semantic Alignment. arXiv:2310.01852 [cs.CV]"}],"event":{"name":"SIGSPATIAL '25: 33rd ACM International Conference on Advances in Geographic Information Systems","location":"The Graduate Hotel Minneapolis Minneapolis MN USA","acronym":"SIGSPATIAL '25","sponsor":["SIGSPATIAL ACM Special Interest Group on Spatial Information"]},"container-title":["Proceedings of the 33rd ACM International Conference on Advances in Geographic Information Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3748636.3764598","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T19:13:50Z","timestamp":1765566830000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3748636.3764598"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,3]]},"references-count":34,"alternative-id":["10.1145\/3748636.3764598","10.1145\/3748636"],"URL":"https:\/\/doi.org\/10.1145\/3748636.3764598","relation":{},"subject":[],"published":{"date-parts":[[2025,11,3]]},"assertion":[{"value":"2025-12-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}