{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T01:50:27Z","timestamp":1778896227057,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681431","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"2574-2583","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["MFRGN: Multi-scale Feature Representation Generalization Network for Ground-to-Aerial Geo-localization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5232-008X","authenticated-orcid":false,"given":"Yuntao","family":"Wang","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0617-0452","authenticated-orcid":false,"given":"Jinpu","family":"Zhang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2562-6021","authenticated-orcid":false,"given":"Ruonan","family":"Wei","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8640-2222","authenticated-orcid":false,"given":"Wenbo","family":"Gao","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7046-7587","authenticated-orcid":false,"given":"Yuehuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"NetVLAD: CNN Architecture for Weakly Supervised Place Recognition","author":"Arandjelovic Relja","year":"2018","unstructured":"Relja Arandjelovic, Petr Gronat, Akihiko Torii, Tomas Pajdla, and Josef Sivic. 2018. NetVLAD: CNN Architecture for Weakly Supervised Place Recognition. IEEE transactions on pattern analysis and machine intelligence, Vol. 40, 6 (2018), 1437--1451."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2071954"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744023_32"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00170"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00848"},{"key":"e_1_3_2_1_6_1","volume-title":"Fast and accurate deep network learning by exponential linear units (elus). arXiv preprint arXiv:1511.07289","author":"Clevert Djork-Arn\u00e9","year":"2015","unstructured":"Djork-Arn\u00e9 Clevert, Thomas Unterthiner, and Sepp Hochreiter. 2015. Fast and accurate deep network learning by exponential linear units (elus). arXiv preprint arXiv:1511.07289 (2015)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01545"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02071"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00758"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612007"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548102"},{"key":"e_1_3_2_1_14_1","volume-title":"Aggregating local descriptors into a compact image representation. In 2010 IEEE computer society conference on computer vision and pattern recognition","author":"J\u00e9gou Herv\u00e9","unstructured":"Herv\u00e9 J\u00e9gou, Matthijs Douze, Cordelia Schmid, and Patrick P\u00e9rez. 2010. Aggregating local descriptors into a compact image representation. In 2010 IEEE computer society conference on computer vision and pattern recognition. IEEE, 3304--3311."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.346"},{"key":"e_1_3_2_1_16_1","volume-title":"International conference on machine learning. PMLR, 5156--5165","author":"Katharopoulos Angelos","year":"2020","unstructured":"Angelos Katharopoulos, Apoorv Vyas, Nikolaos Pappas, and Franccois Fleuret. 2020. Transformers are rnns: Fast autoregressive transformers with linear attention. In International conference on machine learning. PMLR, 5156--5165."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989239"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00819"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01780"},{"key":"e_1_3_2_1_20_1","unstructured":"Guopeng Li Ming Qian and Gui-Song Xia. 2024. Unleashing Unlabeled Data: A Paradigm for Cross-View Geo-Localization."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.120"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00577"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"e_1_3_2_1_24_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i9.28901"},{"key":"e_1_3_2_1_27_1","volume-title":"Towards seamless adaptation of pre-trained models for visual place recognition. arXiv preprint arXiv:2402.14505","author":"Lu Feng","year":"2024","unstructured":"Feng Lu, Lijun Zhang, Xiangyuan Lan, Shuting Dong, Yaowei Wang, and Chun Yuan. 2024. Towards seamless adaptation of pre-trained models for visual place recognition. arXiv preprint arXiv:2402.14505 (2024)."},{"key":"e_1_3_2_1_28_1","first-page":"1","article-title":"It's okay to be wrong: Cross-view geo-localization with step-adaptive iterative refinement","volume":"60","author":"Lu Xiufan","year":"2022","unstructured":"Xiufan Lu, Siqi Luo, and Yingying Zhu. 2022. It's okay to be wrong: Cross-view geo-localization with step-adaptive iterative refinement. IEEE Transactions on Geoscience and Remote Sensing, Vol. 60 (2022), 1--13.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_29_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_1_30_1","volume-title":"Cross-View Visual Geo-Localization for Outdoor Augmented Reality. In 2023 IEEE Conference Virtual Reality and 3D User Interfaces (VR). IEEE, 493--502","author":"Mithun Niluthpol Chowdhury","year":"2023","unstructured":"Niluthpol Chowdhury Mithun, Kshitij S Minhas, Han-Pang Chiu, Taragay Oskiper, Mikhail Sizintsev, Supun Samarasekera, and Rakesh Kumar. 2023. Cross-View Visual Geo-Localization for Outdoor Augmented Reality. In 2023 IEEE Conference Virtual Reality and 3D User Interfaces (VR). IEEE, 493--502."},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the 29th International Conference on Machine Learning, ICML 2012","volume":"2","author":"Mnih A","year":"2012","unstructured":"A Mnih and Y Teh. 2012. A fast and simple algorithm for training neural probabilistic language models. In Proceedings of the 29th International Conference on Machine Learning, ICML 2012, Vol. 2."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00056"},{"key":"e_1_3_2_1_33_1","volume-title":"NeurIPS","volume":"30","author":"Sabour Sara","year":"2017","unstructured":"Sara Sabour, Nicholas Frosst, and Geoffrey E Hinton. 2017. Dynamic routing between capsules. NeurIPS, Vol. 30 (2017)."},{"key":"e_1_3_2_1_34_1","volume-title":"MCCG: A ConvNeXt-based multiple-classifier method for cross-view geo-localization","author":"Shen Tianrui","year":"2023","unstructured":"Tianrui Shen, Yingmei Wei, Lai Kang, Shanshan Wan, and Yee-Hong Yang. 2023. MCCG: A ConvNeXt-based multiple-classifier method for cross-view geo-localization. IEEE Transactions on Circuits and Systems for Video Technology (2023)."},{"key":"e_1_3_2_1_35_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Shi Yujiao","year":"2019","unstructured":"Yujiao Shi, Liu Liu, Xin Yu, and Hongdong Li. 2019. Spatial-aware feature aggregation for image based cross-view geo-localization. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01967"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00412"},{"key":"e_1_3_2_1_38_1","first-page":"2682","article-title":"Accurate 3-DoF camera geo-localization via ground-to-satellite image matching","volume":"45","author":"Shi Yujiao","year":"2022","unstructured":"Yujiao Shi, Xin Yu, Liu Liu, Dylan Campbell, Piotr Koniusz, and Hongdong Li. 2022. Accurate 3-DoF camera geo-localization via ground-to-satellite image matching. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 45, 3 (2022), 2682--2697.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_39_1","volume-title":"Geocapsnet: Aerial to ground view image geo-localization using capsule network. arXiv preprint arXiv:1904.06281","author":"Sun Bin","year":"2019","unstructured":"Bin Sun, Chen Chen, Yingying Zhu, and Jianmin Jiang. 2019. Geocapsnet: Aerial to ground view image geo-localization using capsule network. arXiv preprint arXiv:1904.06281 (2019)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00642"},{"key":"e_1_3_2_1_41_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_42_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6942560"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_30"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01328"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3061265"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs14112575"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.451"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3607834.3616568"},{"key":"e_1_3_2_1_50_1","first-page":"29009","article-title":"Cross-view geo-localization with layer-to-layer transformer","volume":"34","author":"Yang Hongji","year":"2021","unstructured":"Hongji Yang, Xiufan Lu, and Yingying Zhu. 2021. Cross-view geo-localization with layer-to-layer transformer. Advances in Neural Information Processing Systems, Vol. 34 (2021), 29009--29020.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_19"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.440"},{"key":"e_1_3_2_1_53_1","volume-title":"GeoDTR: Toward generic cross-view geolocalization via geometric disentanglement. arXiv preprint arXiv:2308.09624","author":"Zhang Xiaohan","year":"2023","unstructured":"Xiaohan Zhang, Xingyu Li, Waqas Sultani, Chen Chen, and Safwan Wshah. 2023. GeoDTR: Toward generic cross-view geolocalization via geometric disentanglement. arXiv preprint arXiv:2308.09624 (2023)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25457"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413896"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00123"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00364"},{"key":"e_1_3_2_1_59_1","volume-title":"effective and general: A new backbone for cross-view image geo-localization. arXiv preprint arXiv:2302.01572","author":"Zhu Yingying","year":"2023","unstructured":"Yingying Zhu, Hongji Yang, Yuxin Lu, and Qiang Huang. 2023. Simple, effective and general: A new backbone for cross-view image geo-localization. arXiv preprint arXiv:2302.01572 (2023)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681431","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681431","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:46Z","timestamp":1750294666000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681431"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":59,"alternative-id":["10.1145\/3664647.3681431","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681431","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}