{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:11:42Z","timestamp":1775578302701,"version":"3.50.1"},"reference-count":95,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&#x0026;D Program of China","award":["2022ZD0117900"],"award-info":[{"award-number":["2022ZD0117900"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62236010"],"award-info":[{"award-number":["62236010"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276261"],"award-info":[{"award-number":["62276261"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62322607"],"award-info":[{"award-number":["62322607"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372405"],"award-info":[{"award-number":["62372405"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"CCF-Tencent Open Fund"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/tpami.2024.3386695","type":"journal-article","created":{"date-parts":[[2024,4,9]],"date-time":"2024-04-09T18:23:03Z","timestamp":1712686983000},"page":"5130-5145","source":"Crossref","is-referenced-by-count":54,"title":["ETPNav: Evolving Topological Planning for Vision-Language Navigation in Continuous Environments"],"prefix":"10.1109","volume":"47","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1347-8535","authenticated-orcid":false,"given":"Dong","family":"An","sequence":"first","affiliation":[{"name":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), School of Future Technology and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2472-3852","authenticated-orcid":false,"given":"Hanqing","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0802-9567","authenticated-orcid":false,"given":"Wenguan","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9502-050X","authenticated-orcid":false,"given":"Zun","family":"Wang","sequence":"additional","affiliation":[{"name":"Australian National University, Canberra, ACT, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8239-7229","authenticated-orcid":false,"given":"Yan","family":"Huang","sequence":"additional","affiliation":[{"name":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), School of Future Technology and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5136-8444","authenticated-orcid":false,"given":"Keji","family":"He","sequence":"additional","affiliation":[{"name":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), School of Future Technology and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5224-8647","authenticated-orcid":false,"given":"Liang","family":"Wang","sequence":"additional","affiliation":[{"name":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), School of Future Technology and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00387"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00679"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00835"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00169"},{"key":"ref5","first-page":"5834","article-title":"History aware multimodal transformer for vision-and-language navigation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58604-1_7"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.328"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01488"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01500"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_34"},{"key":"ref11","article-title":"1st place solutions for RxR-habitat vision-and-language navigation competition","author":"An","year":"2022"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.93.4.1591"},{"key":"ref13","article-title":"DD-PPO: Learning near-perfect pointgoal navigators from 2.5 billion frames","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wijmans"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01502"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.ne.11.030188.001445"},{"key":"ref16","first-page":"20660","article-title":"Evolving graphical planner: Contextual global planning for vision-and-language navigation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Deng"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01604"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01112"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3275156"},{"key":"ref21","article-title":"Retrospectives on the embodied AI workshop","author":"Deitke","year":"2022"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.356"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01282"},{"key":"ref24","first-page":"394","article-title":"Vision-and-dialog navigation","volume-title":"Proc. Conf. Robot Learn.","author":"Thomason"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1063"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01000"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01250"},{"key":"ref28","first-page":"3318","article-title":"Speaker-follower models for vision-and-language navigation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Fried"},{"key":"ref29","article-title":"Self-monitoring navigation agent via auxiliary progress estimation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ma"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_18"},{"key":"ref31","first-page":"7685","article-title":"Language and visual entity relationship graph for agent navigation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hong"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475282"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_3"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1268"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_8"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58542-6_19"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01721-6"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00167"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01497"},{"key":"ref40","first-page":"5296","article-title":"Counterfactual vision-and-language navigation: Unravelling the unseen","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Parvaneh"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01041"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01503"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01499"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.111"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52729.2023.01826"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3341828"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01315"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00166"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_16"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00168"},{"key":"ref51","first-page":"7357","article-title":"Soat: A scene-and object-aware transformer for vision-and-language navigation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Moudgil"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3234243"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_22"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548281"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00646"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01007"},{"key":"ref57","first-page":"2737","article-title":"BEVBert: Multimodal map pre-training for language-guided navigation","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"An"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01496"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.332"},{"key":"ref60","first-page":"671","article-title":"Sim-to-real transfer for vision-and-language navigation","volume-title":"Proc. Conf. Robot Learn.","author":"Anderson"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/icpr56361.2022.9956561"},{"key":"ref63","first-page":"38149","article-title":"Weakly-supervised multi-granularity map learning for vision-and-language navigation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00998"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/2.30720"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2463671"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126513"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.1999.772544"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00884"},{"key":"ref70","first-page":"13086","article-title":"SEAL: Self-supervised embodied active learning using exploration and 3D consistency","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chaplot"},{"key":"ref71","article-title":"Learning to explore using active neural SLAM","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chaplot"},{"key":"ref72","first-page":"4247","article-title":"Object goal navigation using goal-oriented semantic exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chaplot"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adf6991"},{"key":"ref74","article-title":"Film: Following instructions in language with modular methods","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Min"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01009"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/70.928558"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1016\/0921-8890(91)90014-C"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01289"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01559"},{"key":"ref80","first-page":"26661","article-title":"No RL, no simulation: Learning to navigate without navigating","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hahn"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01582"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"key":"ref85","first-page":"3287","volume-title":"Proc. IEEE\/RSJ Int. Conf. Intell. Robots Syst.","author":"Luo"},{"key":"ref86","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist. JMLR Workshop Conf. Proc.","author":"Ross"},{"key":"ref87","article-title":"On evaluation of embodied navigation agents","author":"Anderson","year":"2018"},{"key":"ref88","article-title":"General evaluation for instruction conditioned navigation using dynamic time warping","author":"Ilharco","year":"2019"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref90","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref92","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"key":"ref94","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Loshchilov"},{"key":"ref95","first-page":"1171","article-title":"Scheduled sampling for sequence prediction with recurrent neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Bengio"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/11026037\/10495141.pdf?arnumber=10495141","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,6]],"date-time":"2025-06-06T04:14:26Z","timestamp":1749183266000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10495141\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":95,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2024.3386695","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}