{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T21:18:34Z","timestamp":1776979114866,"version":"3.51.4"},"reference-count":63,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T00:00:00Z","timestamp":1774483200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neucom.2026.133399","type":"journal-article","created":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T17:24:07Z","timestamp":1773854647000},"page":"133399","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["TF-VPR: A novel benchmark for training-free visual place recognition"],"prefix":"10.1016","volume":"681","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6139-4327","authenticated-orcid":false,"given":"Chenxu","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingtong","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bonan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9142-697X","authenticated-orcid":false,"given":"Fusen","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133399_bib0005","doi-asserted-by":"crossref","first-page":"3019","DOI":"10.1109\/TRO.2025.3550771","article-title":"General place recognition survey: towards real-world autonomy","volume":"41","author":"Yin","year":"2025","journal-title":"IEEE Trans. Robot."},{"issue":"5","key":"10.1016\/j.neucom.2026.133399_bib0010","doi-asserted-by":"crossref","first-page":"1255","DOI":"10.1109\/TRO.2017.2705103","article-title":"ORB-SLAM2: an open-source slam system for monocular, stereo, and RGB-D cameras","volume":"33","author":"Mur-Artal","year":"2017","journal-title":"IEEE Trans. Robot."},{"issue":"5","key":"10.1016\/j.neucom.2026.133399_bib0015","doi-asserted-by":"crossref","first-page":"1038","DOI":"10.1109\/TRO.2008.2004520","article-title":"Mapping a suburb with a single camera using a biologically inspired slam system","volume":"24","author":"Milford","year":"2008","journal-title":"IEEE Trans. Robot."},{"key":"10.1016\/j.neucom.2026.133399_bib0020","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"2623","article-title":"Mapillary street-level sequences: a dataset for lifelong place recognition","author":"Warburg","year":"2020"},{"key":"10.1016\/j.neucom.2026.133399_bib0025","series-title":"2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5297","article-title":"NetVLAD: CNN architecture for weakly supervised place recognition","author":"Arandjelovic","year":"2016"},{"key":"10.1016\/j.neucom.2026.133399_bib0030","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"11080","article-title":"EigenPlaces: training viewpoint robust models for visual place recognition","author":"Berton","year":"2023"},{"key":"10.1016\/j.neucom.2026.133399_bib0035","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"14141","article-title":"Patch-NetVLAD: multi-scale fusion of locally-global descriptors for place recognition","author":"Hausler","year":"2021"},{"key":"10.1016\/j.neucom.2026.133399_bib0040","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","first-page":"2997","article-title":"MixVPR: feature mixing for visual place recognition","author":"Ali-Bey","year":"2023"},{"key":"10.1016\/j.neucom.2026.133399_bib0045","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.neucom.2026.133399_bib0050","author":"Dosovitskiy"},{"key":"10.1016\/j.neucom.2026.133399_bib0055","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"16000","article-title":"Masked autoencoders are scalable vision learners","author":"He","year":"2022"},{"key":"10.1016\/j.neucom.2026.133399_bib0060","author":"Oquab"},{"issue":"5","key":"10.1016\/j.neucom.2026.133399_bib0065","doi-asserted-by":"crossref","first-page":"1188","DOI":"10.1109\/TRO.2012.2197158","article-title":"Bags of binary words for fast place recognition in image sequences","volume":"28","author":"Galvez-L\u00f3pez","year":"2012","journal-title":"IEEE Trans. Robot."},{"key":"10.1016\/j.neucom.2026.133399_bib0070","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"3304","article-title":"Aggregating local descriptors into a compact image representation","author":"J\u00e9gou","year":"2010"},{"key":"10.1016\/j.neucom.2026.133399_bib0075","article-title":"ImageNet classification with deep convolutional neural networks","volume":"25","author":"Krizhevsky","year":"2012","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133399_bib0080","author":"Simonyan"},{"key":"10.1016\/j.neucom.2026.133399_bib0085","author":"Musgrave"},{"key":"10.1016\/j.neucom.2026.133399_bib0090","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"4878","article-title":"Rethinking visual geo-localization for large-scale applications","author":"Berton","year":"2022"},{"issue":"4","key":"10.1016\/j.neucom.2026.133399_bib0095","doi-asserted-by":"crossref","first-page":"4013","DOI":"10.1109\/LRA.2025.3546512","article-title":"Pair-VPR: place-aware pre-training and contrastive pair classification for visual place recognition with vision transformers","volume":"10","author":"Hausler","year":"2025","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.neucom.2026.133399_bib0100","series-title":"2020 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"3327","article-title":"Hierarchical multi-process fusion for visual place recognition","author":"Hausler","year":"2020"},{"issue":"3","key":"10.1016\/j.neucom.2026.133399_bib0105","doi-asserted-by":"crossref","first-page":"4305","DOI":"10.1109\/LRA.2021.3067633","article-title":"SeqNet: learning descriptors for sequence-based hierarchical place recognition","volume":"6","author":"Garg","year":"2021","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"5","key":"10.1016\/j.neucom.2026.133399_bib0110","doi-asserted-by":"crossref","first-page":"4746","DOI":"10.1109\/LRA.2025.3554103","article-title":"On motion blur and deblurring in visual place recognition","volume":"10","author":"Ismagilov","year":"2025","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.neucom.2026.133399_bib0115","author":"Peng"},{"key":"10.1016\/j.neucom.2026.133399_bib0120","series-title":"Computer Vision \u2013 ECCV 2024, Vol. 15126 of Lecture Notes in Computer Science","first-page":"326","article-title":"Revisit anything: visual place recognition via image segment retrieval","author":"Garg","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0125","author":"Lu"},{"key":"10.1016\/j.neucom.2026.133399_bib0130","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1016\/j.neucom.2021.06.038","article-title":"A visual place recognition approach using learnable feature map filtering and graph attention networks","volume":"457","author":"Qin","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133399_bib0135","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111673","article-title":"PRGS: patch-to-region graph search for visual place recognition","volume":"166","author":"Zuo","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133399_bib0140","author":"Chen"},{"issue":"2","key":"10.1016\/j.neucom.2026.133399_bib0145","doi-asserted-by":"crossref","first-page":"1286","DOI":"10.1109\/LRA.2023.3343602","article-title":"AnyLoc: towards universal visual place recognition","volume":"9","author":"Keetha","year":"2024","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.neucom.2026.133399_bib0150","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"17658","article-title":"Optimal transport aggregation for visual place recognition","author":"Izquierdo","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0155","series-title":"Proceedings of the 40th International Conference on Machine Learning (ICML), Vol. 202 of Proceedings of Machine Learning Research","first-page":"8090","article-title":"Open-vocabulary universal image segmentation with MaskCLIP","author":"Ding","year":"2023"},{"key":"10.1016\/j.neucom.2026.133399_bib0160","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13171","article-title":"CLIP as RNN: segment countless visual concepts without training endeavor","author":"Sun","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0165","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3689","article-title":"Training-free open-vocabulary segmentation with offline diffusion-augmented prototype generation","author":"Barsellotti","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0170","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111409","article-title":"A closer look at the explainability of contrastive language-image pre-training","volume":"162","author":"Li","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.neucom.2026.133399_bib0175","series-title":"Computer Vision \u2013 ECCV 2024, Vol. 15105 of Lecture Notes in Computer Science","first-page":"143","article-title":"ClearCLIP: decomposing CLIP representations for dense vision-language inference","author":"Lan","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0180","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"29968","article-title":"ResCLIP: residual attention for training-free dense vision-language inference","author":"Yang","year":"2025"},{"key":"10.1016\/j.neucom.2026.133399_bib0185","author":"Bai"},{"key":"10.1016\/j.neucom.2026.133399_bib0190","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","first-page":"5061","article-title":"Pay attention to your neighbours: training-free open-vocabulary semantic segmentation","author":"Hajimiri","year":"2025"},{"key":"10.1016\/j.neucom.2026.133399_bib0195","series-title":"Computer Vision \u2013 ECCV 2024","first-page":"70","article-title":"ProxyCLIP: proxy attention improves CLIP for open-vocabulary segmentation","author":"Lan","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0200","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"15033","article-title":"Distilling spectral graph for object-context aware open-vocabulary semantic segmentation","author":"Kim","year":"2025"},{"key":"10.1016\/j.neucom.2026.133399_bib0205","series-title":"Computer Vision \u2013 ECCV 2022","first-page":"493","article-title":"Tip-adapter: training-free CLIP-adapter for better vision-language modeling","author":"Zhang","year":"2022"},{"key":"10.1016\/j.neucom.2026.133399_bib0210","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"28718","article-title":"Dual memory networks: a versatile adaptation approach for vision-language models","author":"Zhang","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0215","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"768","article-title":"CALIP: zero-shot enhancement of CLIP with parameter-free attention","volume":"vol. 37","author":"Guo","year":"2023"},{"key":"10.1016\/j.neucom.2026.133399_bib0220","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"5039","article-title":"Text and image are mutually beneficial: enhancing training-free few-shot classification with CLIP","volume":"vol. 39","author":"Li","year":"2025"},{"key":"10.1016\/j.neucom.2026.133399_bib0225","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"On the test-time zero-shot generalization of vision-language models: do we really need prompt learning?","author":"Zanella","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0230","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","article-title":"Transductive zero-shot and few-shot CLIP","author":"Martin","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0235","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"883","article-title":"Visual place recognition with repetitive structures","author":"Torii","year":"2013"},{"key":"10.1016\/j.neucom.2026.133399_bib0240","series-title":"2017 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"3223","article-title":"Deep learning features at scale for visual place recognition","author":"Chen","year":"2017"},{"key":"10.1016\/j.neucom.2026.133399_bib0245","series-title":"2022 26th International Conference on Pattern Recognition (ICPR)","first-page":"2749","article-title":"AmsterTime: a visual place recognition benchmark dataset for severe domain shift","author":"Yildiz","year":"2022"},{"key":"10.1016\/j.neucom.2026.133399_bib0250","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","first-page":"2918","article-title":"Adaptive-attentive geolocalization from few queries: a hybrid approach","author":"Berton","year":"2021"},{"key":"10.1016\/j.neucom.2026.133399_bib0255","series-title":"Proceedings of Workshop on Long-Term Autonomy, IEEE International Conference on Robotics and Automation (ICRA)","article-title":"Are we there yet? Challenging SeqSLAM on a 3000 km journey across all four seasons","author":"S\u00fcnderhauf","year":"2013"},{"issue":"9","key":"10.1016\/j.neucom.2026.133399_bib0260","doi-asserted-by":"crossref","first-page":"1100","DOI":"10.1177\/0278364910385483","article-title":"Appearance-only SLAM at large scale with FAB-MAP 2.0","volume":"30","author":"Cummins","year":"2011","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.neucom.2026.133399_bib0265","unstructured":"S. Zhao, D. Singh, H. Sun, R. Jiang, Y. Gao, T. Wu, J. Karhade, C. Whittaker, I. Higgins, J. Xu, Y. Qiu, S. Saha, C. Wang, W. Wang, S.A. Scherer, SubT-MRS: a subterranean, multi-robot, multi-spectral and multi-degraded dataset for robust SLAM, ArXiv abs\/2307.07607, 2023, https:\/\/api.semanticscholar.org\/CorpusID:271720569"},{"key":"10.1016\/j.neucom.2026.133399_bib0270","unstructured":"M. Schleiss, F. Rouatbi, D. Cremers, VPAIR - aerial visual place recognition and localization in large-scale outdoor environments, ArXiv abs\/2205.11567, 2022, https:\/\/api.semanticscholar.org\/CorpusID:249017910"},{"issue":"9","key":"10.1016\/j.neucom.2026.133399_bib0275","doi-asserted-by":"crossref","first-page":"689","DOI":"10.1177\/02783649231177322","article-title":"Eiffel Tower: a deep-sea underwater dataset for long-term visual localization","volume":"42","author":"Boittiaux","year":"2023","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.neucom.2026.133399_bib0280","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5641","article-title":"A dataset for benchmarking image-based localization","author":"Sun","year":"2017"},{"key":"10.1016\/j.neucom.2026.133399_bib0285","series-title":"2015 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"4297","article-title":"On the performance of ConvNet features for place recognition","author":"Sunderhauf","year":"2015"},{"key":"10.1016\/j.neucom.2026.133399_bib0290","series-title":"2016 13th Conference on Computer and Robot Vision (CRV)","first-page":"53","article-title":"Indoor place recognition system for localization of mobile robots","author":"Sahdev","year":"2016"},{"issue":"1","key":"10.1016\/j.neucom.2026.133399_bib0295","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1177\/0278364916679498","article-title":"1 year, 1000 km: the Oxford Robotcar dataset","volume":"36","author":"Maddern","year":"2016","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.neucom.2026.133399_bib0300","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"17794","article-title":"BoQ: a place is worth a bag of learnable queries","author":"Ali-Bey","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0305","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"17658","article-title":"Optimal transport aggregation for visual place recognition","author":"Izquierdo","year":"2024"},{"key":"10.1016\/j.neucom.2026.133399_bib0310","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.132539","article-title":"SciceVPR: stable cross-image correlation enhanced model for visual place recognition","volume":"669","author":"Wan","year":"2026","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133399_bib0315","series-title":"Computer Vision \u2013 ECCV 2024, Vol. 15079 of Lecture Notes in Computer Science","first-page":"315","article-title":"SCLIP: rethinking self-attention for dense vision-language inference","author":"Wang","year":"2024"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007964?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231226007964?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T20:30:17Z","timestamp":1776976217000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231226007964"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":63,"alternative-id":["S0925231226007964"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133399","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"TF-VPR: A novel benchmark for training-free visual place recognition","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133399","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"133399"}}