{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T06:01:47Z","timestamp":1779948107347,"version":"3.53.1"},"reference-count":78,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,20]]},"DOI":"10.1109\/3dv69130.2026.00067","type":"proceedings-article","created":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T19:40:49Z","timestamp":1779910849000},"page":"640-651","source":"Crossref","is-referenced-by-count":0,"title":["What Does Really Matter in Image Goal Navigation?"],"prefix":"10.1109","author":[{"given":"Gianluca","family":"Monaci","sequence":"first","affiliation":[{"name":"NAVER LABS Europe,Meylan,France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Philippe","family":"Weinzaepfel","sequence":"additional","affiliation":[{"name":"NAVER LABS Europe,Meylan,France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Christian","family":"Wolf","sequence":"additional","affiliation":[{"name":"NAVER LABS Europe,Meylan,France"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01652"},{"key":"ref2","author":"Anderson","year":"2018","journal-title":"On Evaluation of Embodied Navigation Agents"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19769-7_40"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00464"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_28"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-67661-2_31"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00500"},{"key":"ref8","article-title":"End-to-End (Instance)-Image Goal Navigation through Correspondence as an Emergent Phenomenon,","volume-title":"International Conference on Learning Representations ICLR","author":"Bono","year":"2024"},{"key":"ref9","article-title":"Learning with a Mole: Transferable Latent Spatial Representations for Navigation without Reconstruction","volume-title":"International Conference on Learning Representations ICLR","author":"Bono","year":"2024"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01689"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.267"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2017.2749181"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.5555\/295240.295249"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.073"},{"key":"ref16","article-title":"Object Goal Navigation using Goal-Oriented Semantic Exploration","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Singh Chaplot","year":"2020"},{"key":"ref17","article-title":"Learning To Explore Using Active Neural SLAM","volume-title":"International Conference on Learning Representations (ICLR)","author":"Singh Chaplot","year":"2020"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01289"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01540"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01604"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1179"},{"key":"ref22","article-title":"Neural modular control for embodied question answering","volume-title":"Conference on Robot Learning CoRL","author":"Das","year":"2018"},{"key":"ref23","article-title":"Goal-conditioned Imitation Learning","volume-title":"Conference on Neural Information Processing Systems NeurIPS","author":"Ding","year":"2019"},{"key":"ref24","article-title":"VTNet: Visual transformer network for object goal navigation","volume-title":"International Conference on Learning Representations ICLR","author":"Du","year":"2021"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00063"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/100.580977"},{"key":"ref27","article-title":"No RL, No Simulation: Learning to Navigate without Navigating","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Hahn","year":"2021"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1017\/cbo9780511811685"},{"key":"ref29","article-title":"MapNet: An allocentric spatial memory for mapping environments","volume-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Jo\u00e3o","year":"2018"},{"key":"ref30","article-title":"Reinforcement learning with unsupervised auxiliary tasks","volume-title":"International Conference on Learning Representations (ICLR)","author":"Jaderberg","year":"2017"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01131"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3013848"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.336"},{"key":"ref34","author":"Kim","year":"2024","journal-title":"Unsupervised-to-online reinforcement learning"},{"key":"ref35","author":"Kolve","year":"2017","journal-title":"AI2-Thor: An interactive 3D environment for visual AI"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2000.894676"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01002"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00878"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1002\/rob.21831"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73220-1_5"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461251"},{"key":"ref42","article-title":"A single goal is all you need: Skills and exploration emerge from contrastive RL without rewards, demonstrations, or subgoals","volume-title":"International Conference on Learning Representations ICLR","author":"Liu","year":"2025"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341207"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2343"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509725"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01010"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981090"},{"key":"ref48","article-title":"Learning to navigate in complex environments","volume-title":"International Conference on Learning Representations ICLR","author":"Mirowski","year":"2017"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2463671"},{"key":"ref50","article-title":"DINOv2: Learning robust visual features without supervision","author":"Oquab","year":"2024","journal-title":"Transactions on Machine Learning Research Journal (TMLR)"},{"key":"ref51","article-title":"Neural map: Structured memory for deep reinforcement learning","volume-title":"International Conference on Learning Representations ICLR","author":"Parisotto","year":"2018"},{"key":"ref52","author":"Kumar Ramakrishnan","year":"2021","journal-title":"NeurIPS Datasets and Benchmarks Track"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01832"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01716"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00566"},{"key":"ref56","article-title":"A generalist agent","author":"Reed","year":"2022","journal-title":"Transactions on Machine Learning Research Journal (TMLR)"},{"key":"ref57","article-title":"R2D2: Reliable and Repeatable Detector and Descriptor","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Revaud","year":"2019"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00144"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ECC.2015.7331052"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"key":"ref63","author":"Schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.93.4.1591"},{"key":"ref65","article-title":"ViNT: A foundation model for visual navigation","volume-title":"Conference on Robot Learning CoRL","author":"Shah","year":"2023"},{"key":"ref66","article-title":"A general purpose supervisory signal for embodied agents","volume-title":"International Conference on Computer Vision (ICCV)","author":"Pratap Singh","year":"2022"},{"key":"ref67","article-title":"Offline actor-critic reinforcement learning scales to large models","volume-title":"International Conference on Machine Learning (ICML)","author":"Tobias Springenberg","year":"2024"},{"key":"ref68","article-title":"FGPrompt: fine-grained goal prompting for image-goal navigation","volume-title":"Conference on Neural Information Processing Systems NeurIPS","author":"Sun","year":"2024"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1145\/504729.504754"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01717"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0253"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01647"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3178810"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref76","author":"Yadav","year":"2023","journal-title":"OVRL-V2: A sim-\\\\ ple State-of-art Baseline for ImageNav and ObjectNav"},{"key":"ref77","article-title":"Poliformer: Scaling on-policy RL with transformers results in masterful navigators","volume-title":"Conference on Robot Learning CoRL","author":"Zeng","year":"2024"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"}],"event":{"name":"2026 International Conference on 3D Vision (3DV)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2026,3,20]]},"end":{"date-parts":[[2026,3,23]]}},"container-title":["2026 International Conference on 3D Vision (3DV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11533157\/11533158\/11533267.pdf?arnumber=11533267","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T05:02:21Z","timestamp":1779944541000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11533267\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,20]]},"references-count":78,"URL":"https:\/\/doi.org\/10.1109\/3dv69130.2026.00067","relation":{},"subject":[],"published":{"date-parts":[[2026,3,20]]}}}