{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T12:51:17Z","timestamp":1774702277083,"version":"3.50.1"},"reference-count":57,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["4217010680"],"award-info":[{"award-number":["4217010680"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.knosys.2026.115677","type":"journal-article","created":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T20:35:40Z","timestamp":1772483740000},"page":"115677","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Dual-stage method with memory-augmented embedding learning and attention-guided re-ranking for robust visual place recognition"],"prefix":"10.1016","volume":"340","author":[{"given":"Qilong","family":"Wu","sequence":"first","affiliation":[]},{"given":"Peiwen","family":"Yao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2034-982X","authenticated-orcid":false,"given":"Lin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Haihong","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Yining","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Ying","family":"Zuo","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Yukun","family":"Wu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2026.115677_bib0001","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TRO.2015.2496823","article-title":"Visual place recognition: a survey","volume":"32","author":"Lowry","year":"2016","journal-title":"IEEE Trans. Robot."},{"key":"10.1016\/j.knosys.2026.115677_bib0002","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2020.107760","article-title":"Visual place recognition: a survey from deep learning perspective","volume":"113","author":"Zhang","year":"2021","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2026.115677_bib0003","doi-asserted-by":"crossref","first-page":"306","DOI":"10.1016\/j.isprsjprs.2024.05.006","article-title":"A coarse-to-fine visual geo-localization method for GNSS-denied UAV with oblique-view imagery","volume":"212","author":"Ye","year":"2024","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"key":"10.1016\/j.knosys.2026.115677_bib0004","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110993","article-title":"Transformer-based descriptors with fine-grained region supervisions for visual place recognition","volume":"280","author":"Wang","year":"2023","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.knosys.2026.115677_bib0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.111015","article-title":"Distilled representation using patch-based local-to-global similarity strategy for visual place recognition","volume":"280","author":"Zhang","year":"2023","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.knosys.2026.115677_bib0006","doi-asserted-by":"crossref","first-page":"2074","DOI":"10.1109\/TPAMI.2020.3032010","article-title":"Long-term visual localization revisited","volume":"44","author":"Toft","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115677_bib0007","doi-asserted-by":"crossref","first-page":"661","DOI":"10.1109\/TNNLS.2019.2908982","article-title":"Spatial pyramid-enhanced NetVLAD with weighted triplet loss for place recognition","volume":"31","author":"Yu","year":"2020","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.knosys.2026.115677_bib0008","doi-asserted-by":"crossref","first-page":"7355","DOI":"10.1109\/TITS.2020.3001228","article-title":"Memorable maps: a framework for re-defining places in visual place recognition","volume":"22","author":"Zaffar","year":"2021","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.knosys.2026.115677_bib0009","doi-asserted-by":"crossref","first-page":"1109","DOI":"10.1109\/TCSVT.2022.3212434","article-title":"Hybrid CNN-transformer features for visual place recognition","volume":"33","author":"Wang","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.knosys.2026.115677_bib0010","doi-asserted-by":"crossref","first-page":"2136","DOI":"10.1007\/s11263-021-01469-5","article-title":"VPR-bench: an open-source visual place recognition evaluation framework with quantifiable viewpoint and appearance change","volume":"129","author":"Zaffar","year":"2021","journal-title":"Int. J. Comput. Vision"},{"key":"10.1016\/j.knosys.2026.115677_bib0011","doi-asserted-by":"crossref","first-page":"1437","DOI":"10.1109\/TPAMI.2017.2711011","article-title":"NetVLAD: CNN architecture for weakly supervised place recognition","volume":"40","author":"Arandjelovic","year":"2018","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115677_bib0012","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4868","article-title":"Rethinking visual geo-localization for large-scale applications","author":"Berton","year":"2022"},{"key":"10.1016\/j.knosys.2026.115677_bib0013","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.109425","article-title":"Learning robust representation and sequence constraint for retrieval-based long-term visual place recognition","volume":"138","author":"Tan","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.knosys.2026.115677_bib0014","doi-asserted-by":"crossref","first-page":"152","DOI":"10.1109\/TCYB.2020.2977128","article-title":"A robust image-sequence-based framework for visual place recognition in changing environments","volume":"52","author":"Wang","year":"2022","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.knosys.2026.115677_bib0015","doi-asserted-by":"crossref","first-page":"8127","DOI":"10.1109\/TII.2022.3216619","article-title":"Neighborhood Manifold preserving matching for visual place recognition","volume":"19","author":"Ye","year":"2023","journal-title":"IEEE Trans. Ind. Informat."},{"key":"10.1016\/j.knosys.2026.115677_bib0016","series-title":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"14136","article-title":"Patch-NetVLAD: multi-scale fusion of locally-global descriptors for place recognition","author":"Hausler","year":"2021"},{"key":"10.1016\/j.knosys.2026.115677_bib0017","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13638","article-title":"TransVPR: transformer-based place recognition with multi-level attention aggregation","author":"Wang","year":"2022"},{"key":"10.1016\/j.knosys.2026.115677_bib0018","series-title":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"19370","article-title":"R2Former: unified retrieval and reranking transformer for place recognition","author":"Zhu","year":"2023"},{"key":"10.1016\/j.knosys.2026.115677_bib0019","series-title":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11217","article-title":"StructVPR: distill structural knowledge with weighting samples for visual place recognition","author":"Shen","year":"2023"},{"key":"10.1016\/j.knosys.2026.115677_bib0020","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly, J. Uszkoreit, N. Houlsby, An image is worth 16x16 words: transformers for image recognition at scale, (2021). https:\/\/doi.org\/10.48550\/arXiv.2010.11929."},{"key":"10.1016\/j.knosys.2026.115677_bib0021","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","article-title":"Distinctive image features from scale-invariant keypoints","volume":"60","author":"Lowe","year":"2004","journal-title":"Int. J. Comput. Vision"},{"key":"10.1016\/j.knosys.2026.115677_bib0022","series-title":"2011 International Conference on Computer Vision","first-page":"2564","article-title":"ORB: an efficient alternative to SIFT or SURF","author":"Rublee","year":"2011"},{"key":"10.1016\/j.knosys.2026.115677_bib0023","series-title":"Proceedings Ninth IEEE International Conference on Computer Vision","first-page":"1470","article-title":"Video Google: a text retrieval approach to object matching in videos","volume":"2","author":"Sivic","year":"2003"},{"key":"10.1016\/j.knosys.2026.115677_bib0024","series-title":"2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition","first-page":"3304","article-title":"Aggregating local descriptors into a compact image representation","author":"Jegou","year":"2010"},{"key":"10.1016\/j.knosys.2026.115677_bib0025","doi-asserted-by":"crossref","unstructured":"Y. Ge, H. Wang, F. Zhu, R. Zhao, H. Li, Self-supervising fine-grained region similarities for large-scale image localization, (2020). https:\/\/doi.org\/10.48550\/arXiv.2006.03926.","DOI":"10.1007\/978-3-030-58548-8_22"},{"key":"10.1016\/j.knosys.2026.115677_bib0026","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"17794","article-title":"BoQ: a place is worth a bag of learnable queries","author":"Ali-bey","year":"2024"},{"key":"10.1016\/j.knosys.2026.115677_bib0027","series-title":"2023 IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"2997","article-title":"MixVPR: feature mixing for visual place recognition","author":"Ali-Bey","year":"2023"},{"key":"10.1016\/j.knosys.2026.115677_bib0028","unstructured":"F. Lu, L. Zhang, X. Lan, S. Dong, Y. Wang, C. Yuan, Towards seamless adaptation of pre-trained models for visual place recognition, (2024). https:\/\/doi.org\/10.48550\/arXiv.2402.14505."},{"key":"10.1016\/j.knosys.2026.115677_bib0029","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4937","article-title":"SuperGlue: learning feature matching with graph neural networks","author":"Sarlin","year":"2020"},{"key":"10.1016\/j.knosys.2026.115677_bib0030","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1145\/358669.358692","article-title":"Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography","volume":"24","author":"Fischler","year":"1981","journal-title":"Commun. ACM"},{"key":"10.1016\/j.knosys.2026.115677_bib0031","unstructured":"M. Oquab, T. Darcet, T. Moutakanni, H. Vo, M. Szafraniec, V. Khalidov, P. Fernandez, D. Haziza, F. Massa, A. El-Nouby, M. Assran, N. Ballas, W. Galuba, R. Howes, P.-Y. Huang, S.-W. Li, I. Misra, M. Rabbat, V. Sharma, G. Synnaeve, H. Xu, H. Jegou, J. Mairal, P. Labatut, A. Joulin, P. Bojanowski, DINOv2: learning robust visual features without supervision, (2023). https:\/\/doi.org\/10.48550\/arXiv.2304.07193."},{"key":"10.1016\/j.knosys.2026.115677_bib0032","series-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","first-page":"23296","article-title":"Intriguing properties of vision transformers","author":"Naseer","year":"2021"},{"key":"10.1016\/j.knosys.2026.115677_bib0033","doi-asserted-by":"crossref","first-page":"1286","DOI":"10.1109\/LRA.2023.3343602","article-title":"AnyLoc: towards universal visual place recognition","volume":"9","author":"Keetha","year":"2024","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.knosys.2026.115677_bib0034","author":"Tzachor"},{"key":"10.1016\/j.knosys.2026.115677_bib0035","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1945","article-title":"Triplet-center loss for multi-view 3D object retrieval","author":"He","year":"2018"},{"key":"10.1016\/j.knosys.2026.115677_bib0036","first-page":"4591","article-title":"Adaptive neighborhood metric learning","volume":"44","author":"Song","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115677_bib0037","series-title":"Proceedings of the 16th International Conference on Neural Information Processing Systems","first-page":"521","article-title":"Distance metric learning, with application to clustering with side-information","author":"Xing","year":"2002"},{"key":"10.1016\/j.knosys.2026.115677_bib0038","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"4685","article-title":"ArcFace: additive angular margin loss for deep face recognition","author":"Deng","year":"2019"},{"key":"10.1016\/j.knosys.2026.115677_bib0039","series-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5017","article-title":"Multi-similarity loss with general pair weighting for deep metric learning","author":"Wang","year":"2019"},{"key":"10.1016\/j.knosys.2026.115677_bib0040","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"6387","article-title":"Cross-batch memory for embedding learning","author":"Wang","year":"2020"},{"key":"10.1016\/j.knosys.2026.115677_bib0041","series-title":"2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition - Volume 2 (CVPR\u201906)","first-page":"1735","article-title":"Dimensionality reduction by learning an invariant mapping","author":"Hadsell","year":"2006"},{"key":"10.1016\/j.knosys.2026.115677_bib0042","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3733","article-title":"Unsupervised feature learning via non-parametric instance discrimination","author":"Wu","year":"2018"},{"key":"10.1016\/j.knosys.2026.115677_bib0043","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9726","article-title":"Momentum contrast for unsupervised visual representation learning","author":"He","year":"2020"},{"key":"10.1016\/j.knosys.2026.115677_bib0044","series-title":"2012 IEEE 51st IEEE Conference on Decision and Control","first-page":"5451","article-title":"Distributed delayed stochastic optimization","author":"Agarwal","year":"2012"},{"key":"10.1016\/j.knosys.2026.115677_bib0045","series-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems","first-page":"315","article-title":"Accelerating stochastic gradient descent using predictive variance reduction","author":"Johnson","year":"2013"},{"key":"10.1016\/j.knosys.2026.115677_bib0046","doi-asserted-by":"crossref","first-page":"194","DOI":"10.1016\/j.neucom.2022.09.127","article-title":"GSV-cities: toward appropriate supervised visual place recognition","volume":"513","author":"Ali-bey","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.knosys.2026.115677_bib0047","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2623","article-title":"Mapillary street-level sequences: a dataset for lifelong place recognition","author":"Warburg","year":"2020"},{"key":"10.1016\/j.knosys.2026.115677_bib0048","doi-asserted-by":"crossref","first-page":"2346","DOI":"10.1109\/TPAMI.2015.2409868","article-title":"Visual place recognition with repetitive structures","volume":"37","author":"Torii","year":"2015","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115677_bib0049","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/TPAMI.2017.2667665","article-title":"24\/7 Place recognition by view synthesis","volume":"40","author":"Torii","year":"2018","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115677_bib0050","doi-asserted-by":"crossref","first-page":"4015","DOI":"10.1109\/LRA.2018.2859916","article-title":"Learning context flexible attention model for long-term visual place recognition","volume":"3","author":"Chen","year":"2018","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.knosys.2026.115677_bib0051","series-title":"Proc. of Workshop on Long-Term Autonomy, IEEE International Conference on Robotics and Automation (ICRA)","article-title":"Are we there yet? Challenging SeqSLAM on a 3000 km journey across all four seasons","author":"S\u00fcnderhauf","year":"2013"},{"key":"10.1016\/j.knosys.2026.115677_bib0052","series-title":"2022 26th International Conference on Pattern Recognition","first-page":"2749","article-title":"AmsterTime: a visual place recognition benchmark dataset for severe domain shift","author":"Yildiz","year":"2022"},{"key":"10.1016\/j.knosys.2026.115677_bib0053","series-title":"2021 IEEE Winter Conference on Applications of Computer Vision","first-page":"2917","article-title":"Adaptive-attentive geolocalization from few queries: a hybrid approach","author":"Moreno Berton","year":"2021"},{"key":"10.1016\/j.knosys.2026.115677_bib0054","unstructured":"I. Loshchilov, F. Hutter, Decoupled weight decay regularization, (2019). https:\/\/doi.org\/10.48550\/arXiv.1711.05101."},{"key":"10.1016\/j.knosys.2026.115677_bib0055","doi-asserted-by":"crossref","unstructured":"M. Douze, A. Guzhva, C. Deng, J. Johnson, G. Szilvasy, P.-E. Mazar\u00e9, M. Lomeli, L. Hosseini, H. J\u00e9gou, The Faiss library, (2025). https:\/\/doi.org\/10.48550\/arXiv.2401.08281.","DOI":"10.1109\/TBDATA.2025.3618474"},{"key":"10.1016\/j.knosys.2026.115677_bib0056","series-title":"2023 IEEE\/CVF International Conference on Computer Vision","first-page":"11046","article-title":"EigenPlaces: training viewpoint robust models for visual place recognition","author":"Berton","year":"2023"},{"key":"10.1016\/j.knosys.2026.115677_bib0057","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"17658","article-title":"Optimal transport aggregation for visual place recognition","author":"Izquierdo","year":"2024"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095070512600417X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095070512600417X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T12:09:24Z","timestamp":1774699764000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S095070512600417X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":57,"alternative-id":["S095070512600417X"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115677","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Dual-stage method with memory-augmented embedding learning and attention-guided re-ranking for robust visual place recognition","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115677","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115677"}}