{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T08:12:26Z","timestamp":1779264746068,"version":"3.51.4"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.neunet.2026.109099","type":"journal-article","created":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:04:15Z","timestamp":1778756655000},"page":"109099","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Window-to-window BEV representation learning for limited FoV cross-view geo-localization"],"prefix":"10.1016","volume":"203","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0664-6194","authenticated-orcid":false,"given":"Lei","family":"Cheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daikun","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lingquan","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1802-0435","authenticated-orcid":false,"given":"Teng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changyin","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"issue":"1","key":"10.1016\/j.neunet.2026.109099_bib0001","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1109\/TIV.2024.3411098","article-title":"Offset regression enhanced cross-view feature interaction for ground-to-aerial geo-localization","volume":"10","author":"Cheng","year":"2025","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"issue":"7","key":"10.1016\/j.neunet.2026.109099_bib0002","doi-asserted-by":"crossref","first-page":"4376","DOI":"10.1109\/TCSVT.2021.3135013","article-title":"A transformer-based feature segmentation and region alignment method for UAV-view geo-localization","volume":"32","author":"Dai","year":"2021","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.109099_bib0003","article-title":"C3aptioner: Improving change captioning by leveraging momentum cross-view and cross-modality contrastive learning","volume":"193","author":"Deng","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.109099_bib0004","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"16847","article-title":"Sample4Geo: Hard negative sampling for cross-view geo-localisation","author":"Deuser","year":"2023"},{"key":"10.1016\/j.neunet.2026.109099_bib0005","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S. et al. (2020). An image is worth 16x16 words: Transformers for image recognition at scale. arXiv: 2010.11929."},{"issue":"11","key":"10.1016\/j.neunet.2026.109099_bib0006","doi-asserted-by":"crossref","first-page":"11630","DOI":"10.1109\/TCSVT.2024.3425509","article-title":"CCR: A counterfactual causal reasoning-based method for cross-view geo-localization","volume":"34","author":"Du","year":"2024","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.109099_bib0007","unstructured":"Fervers, F., Bullinger, S., Bodensteiner, C., Arens, M., & Stiefelhagen, R. (2023a). C-BEV: Contrastive bird\u2019s eye view training for cross-view image retrieval and 3-DoF pose estimation. arXiv: 2312.08060."},{"key":"10.1016\/j.neunet.2026.109099_bib0008","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"21621","article-title":"Uncertainty-aware vision-based metric cross-view geolocalization","author":"Fervers","year":"2023"},{"key":"10.1016\/j.neunet.2026.109099_bib0009","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"12595","article-title":"AugFPN: Improving multi-scale feature learning for object detection","author":"Guo","year":"2020"},{"key":"10.1016\/j.neunet.2026.109099_bib0010","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"7258","article-title":"CVM-Net: Cross-view matching network for image-based ground-to-aerial geo-localization","author":"Hu","year":"2018"},{"key":"10.1016\/j.neunet.2026.109099_bib0011","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"8100","article-title":"Cross-view policy learning for street navigation","author":"Li","year":"2019"},{"key":"10.1016\/j.neunet.2026.109099_bib0012","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2025.107675","article-title":"BEVFix: Deep feature enhancement for robust 3D object detection","volume":"190","author":"Li","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.109099_bib0013","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"1477","article-title":"BEVDepth: Acquisition of reliable depth for multi-view 3D object detection","author":"Li","year":"2023"},{"key":"10.1016\/j.neunet.2026.109099_bib0014","series-title":"European conference on computer vision","first-page":"1","article-title":"BEVFormer: Learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers","author":"Li","year":"2022"},{"key":"10.1016\/j.neunet.2026.109099_bib0015","series-title":"2022\u202fIEEE International conference on multimedia and expo (ICME)","first-page":"1","article-title":"Cat: Cross attention in vision transformer","author":"Lin","year":"2022"},{"key":"10.1016\/j.neunet.2026.109099_bib0016","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"5007","article-title":"Learning deep representations for ground-to-aerial geolocalization","author":"Lin","year":"2015"},{"key":"10.1016\/j.neunet.2026.109099_bib0017","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"2117","article-title":"Feature pyramid networks for object detection","author":"Lin","year":"2017"},{"key":"10.1016\/j.neunet.2026.109099_bib0018","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"5624","article-title":"Lending orientation to neural networks for cross-view geo-localization","author":"Liu","year":"2019"},{"key":"10.1016\/j.neunet.2026.109099_bib0019","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"11976","article-title":"A convnet for the 2020s","author":"Liu","year":"2022"},{"key":"10.1016\/j.neunet.2026.109099_bib0020","unstructured":"Loshchilov, I., & Hutter, F. (2017). Decoupled weight decay regularization. arXiv: 1711.05101."},{"key":"10.1016\/j.neunet.2026.109099_bib0021","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"859","article-title":"Geometry-aware satellite-to-ground image synthesis for urban areas","author":"Lu","year":"2020"},{"key":"10.1016\/j.neunet.2026.109099_bib0022","doi-asserted-by":"crossref","unstructured":"Mi, L., Xu, C., Castillo-Navarro, J., Montariol, S., Yang, W., Bosselut, A., & Tuia, D. (2024). ConGeo: Robust cross-view geo-localization across ground view variations. arXiv: 2403.13965.","DOI":"10.1007\/978-3-031-72630-9_13"},{"key":"10.1016\/j.neunet.2026.109099_bib0039","unstructured":"Oord A.v.d., Li, Y., & Vinyals, O. (2018). Representation learning with contrastive predictive coding. arXiv: 1807.03748."},{"key":"10.1016\/j.neunet.2026.109099_bib0023","series-title":"Computer vision\u2013ECCV 2020: 16th european conference, glasgow, UK, august 23\u201328, 2020, proceedings, part XIV 16","first-page":"194","article-title":"Lift, splat, shoot: Encoding images from arbitrary camera rigs by implicitly unprojecting to 3d","author":"Philion","year":"2020"},{"key":"10.1016\/j.neunet.2026.109099_bib0024","series-title":"International conference on machine learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.neunet.2026.109099_bib0025","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"470","article-title":"Bridging the domain gap for ground-to-aerial image matching","author":"Regmi","year":"2019"},{"key":"10.1016\/j.neunet.2026.109099_bib0026","series-title":"Proceedings of the IEEE\/CVF winter conference on applications of computer vision","first-page":"3871","article-title":"Global assists local: Effective aerial representations for field of view constrained image geo-localization","author":"Rodrigues","year":"2022"},{"key":"10.1016\/j.neunet.2026.109099_bib0027","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"618","article-title":"Grad-CAM: Visual explanations from deep networks via gradient-based localization","author":"Selvaraju","year":"2017"},{"issue":"3","key":"10.1016\/j.neunet.2026.109099_bib0028","doi-asserted-by":"crossref","first-page":"1456","DOI":"10.1109\/TCSVT.2023.3296074","article-title":"MCCG: A convnext-based multiple-classifier method for cross-view geo-localization","volume":"34","author":"Shen","year":"2024","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.109099_bib0029","series-title":"2019 International conference on robotics and automation (ICRA)","first-page":"1827","article-title":"Uav pose estimation using cross-view geolocalization with satellite imagery","author":"Shetty","year":"2019"},{"key":"10.1016\/j.neunet.2026.109099_bib0030","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2025.107704","article-title":"Polarfusion: A multi-modal fusion algorithm for 3D object detection based on polar coordinates","volume":"190","author":"Shi","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.109099_bib0031","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"17010","article-title":"Beyond cross-view image retrieval: Highly accurate vehicle localization using satellite image","author":"Shi","year":"2022"},{"key":"10.1016\/j.neunet.2026.109099_bib0032","first-page":"10090","article-title":"Spatial-aware feature aggregation for image based cross-view geo-localization","volume":"32","author":"Shi","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.109099_bib0033","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"21516","article-title":"Boosting 3-DoF ground-to-satellite camera localization accuracy via geometry-guided cross-view transformer","author":"Shi","year":"2023"},{"key":"10.1016\/j.neunet.2026.109099_bib0034","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"4064","article-title":"Where am i looking at? Joint location and orientation estimation by cross-view matching","author":"Shi","year":"2020"},{"issue":"3","key":"10.1016\/j.neunet.2026.109099_bib0035","first-page":"2682","article-title":"Accurate 3-DoF camera geo-localization via ground-to-satellite image matching","volume":"45","author":"Shi","year":"2022","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"10.1016\/j.neunet.2026.109099_bib0036","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"11990","article-title":"Optimal feature transport for cross-view image geo-localization","volume":"vol. 34","author":"Shi","year":"2020"},{"key":"10.1016\/j.neunet.2026.109099_bib0037","series-title":"Proceedings of the IEEE\/CVF winter conference on applications of computer vision","first-page":"209","article-title":"ArcGeo: Localizing limited field-of-view images using cross-view matching","author":"Shugaev","year":"2024"},{"key":"10.1016\/j.neunet.2026.109099_bib0038","first-page":"6000","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"10.1016\/j.neunet.2026.109099_bib0040","series-title":"Computer vision\u2013ECCV 2016: 14th European conference, amsterdam, the Netherlands, October 11\u201314, 2016, proceedings, Part I 14","first-page":"494","article-title":"Localizing and orienting street views using overhead imagery","author":"Vo","year":"2016"},{"issue":"3","key":"10.1016\/j.neunet.2026.109099_bib0041","doi-asserted-by":"crossref","first-page":"1927","DOI":"10.1109\/TCSVT.2023.3293514","article-title":"DeHi: A decoupled hierarchical architecture for unaligned ground-to-aerial geo-localization","volume":"34","author":"Wang","year":"2024","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"issue":"2","key":"10.1016\/j.neunet.2026.109099_bib0042","doi-asserted-by":"crossref","first-page":"867","DOI":"10.1109\/TCSVT.2021.3061265","article-title":"Each part matters: Local patterns facilitate cross-view geo-localization","volume":"32","author":"Wang","year":"2021","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.109099_bib0043","first-page":"5301","article-title":"Fine-grained cross-view geo-localization using a correlation-aware homography estimator","volume":"36","author":"Wang","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.109099_bib0044","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"3961","article-title":"Wide-area image geolocalization with aerial reference imagery","author":"Workman","year":"2015"},{"issue":"12","key":"10.1016\/j.neunet.2026.109099_bib0045","doi-asserted-by":"crossref","first-page":"13271","DOI":"10.1109\/TCSVT.2024.3443510","article-title":"Enhancing cross-view geo-localization with domain alignment and scene consistency","volume":"34","author":"Xia","year":"2024","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.109099_bib0046","first-page":"29009","article-title":"Cross-view geo-localization with layer-to-layer transformer","volume":"34","author":"Yang","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.neunet.2026.109099_bib0047","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"867","article-title":"Predicting ground-level scene layout from aerial imagery","author":"Zhai","year":"2017"},{"key":"10.1016\/j.neunet.2026.109099_bib0048","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"3480","article-title":"Cross-view geo-localization via learning disentangled geometric layout correspondence","volume":"vol. 37","author":"Zhang","year":"2023"},{"key":"10.1016\/j.neunet.2026.109099_bib0049","series-title":"Proceedings of the 28th ACM international conference on multimedia","first-page":"1395","article-title":"University-1652: A multi-view multi-source benchmark for drone-based geo-localization","author":"Zheng","year":"2020"},{"issue":"9","key":"10.1016\/j.neunet.2026.109099_bib0050","doi-asserted-by":"crossref","first-page":"4825","DOI":"10.1109\/TCSVT.2023.3249204","article-title":"SUES-200: A multi-height multi-scene cross-view image benchmark across drone and satellite","volume":"33","author":"Zhu","year":"2023","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.109099_bib0051","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1162","article-title":"TransGeo: Transformer is all you need for cross-view image geo-localization","author":"Zhu","year":"2022"},{"key":"10.1016\/j.neunet.2026.109099_bib0052","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"3640","article-title":"VIGOR: Cross-view image geo-localization beyond one-to-one retrieval","author":"Zhu","year":"2021"},{"key":"10.1016\/j.neunet.2026.109099_bib0053","unstructured":"Zhu, Y., Yang, H., Lu, Y., & Huang, Q. (2023b). Simple, effective and general: A new backbone for cross-view image geo-localization. arXiv: 2302.01572."}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026005599?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026005599?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T07:45:54Z","timestamp":1779263154000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026005599"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":53,"alternative-id":["S0893608026005599"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.109099","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Window-to-window BEV representation learning for limited FoV cross-view geo-localization","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.109099","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"109099"}}