{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T21:24:28Z","timestamp":1776374668873,"version":"3.51.2"},"reference-count":121,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1813202"],"award-info":[{"award-number":["U1813202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61773093"],"award-info":[{"award-number":["61773093"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Basic Research Program of China","doi-asserted-by":"publisher","award":["2018YFC0831800"],"award-info":[{"award-number":["2018YFC0831800"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004829","name":"Research Programs of Sichuan Science and Technology Department","doi-asserted-by":"publisher","award":["17ZDYF3184"],"award-info":[{"award-number":["17ZDYF3184"]}],"id":[{"id":"10.13039\/501100004829","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Important Science and Technology Innovation Projects in Chengdu","award":["2018-YF08-00039-GX"],"award-info":[{"award-number":["2018-YF08-00039-GX"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/access.2020.3011438","type":"journal-article","created":{"date-parts":[[2020,7,23]],"date-time":"2020-07-23T20:44:32Z","timestamp":1595537072000},"page":"135426-135442","source":"Crossref","is-referenced-by-count":85,"title":["A Survey on Visual Navigation for Artificial Agents With Deep Reinforcement Learning"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4883-5096","authenticated-orcid":false,"given":"Fanyu","family":"Zeng","sequence":"first","affiliation":[]},{"given":"Chen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Shuzhi Sam","family":"Ge","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"ref33","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref32","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref37","article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","author":"howard","year":"2017","journal-title":"arXiv 1704 04861"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2012.6315022"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref29","first-page":"696","article-title":"Learning representations by back-propagating errors","volume":"323","author":"rumelhart","year":"1988","journal-title":"Nature"},{"key":"ref20","article-title":"Deep reinforcement learning: An overview","author":"li","year":"2017","journal-title":"arXiv 1701 07274"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref21","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref24","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref101","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1109\/TCDS.2018.2875309","article-title":"Episodic memory multimodal learning for robot sensorimotor map building and navigation","volume":"11","author":"chin","year":"2019","journal-title":"IEEE Trans Cogn Develop Syst"},{"key":"ref26","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2017.2776965"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00649"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00152"},{"key":"ref59","article-title":"Reinforcement learning with unsupervised auxiliary tasks","author":"jaderberg","year":"2016","journal-title":"arXiv 1611 05397"},{"key":"ref58","first-page":"1","article-title":"Learning to navigate in complex environments","author":"mirowski","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref57","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref56","first-page":"1","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/j.tins.2010.01.006"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.102.3.419"},{"key":"ref53","article-title":"Connecting generative adversarial networks and actor-critic methods","author":"pfau","year":"2016","journal-title":"arXiv 1610 01945"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.10.012"},{"key":"ref40","article-title":"Unsupervised representation learning with deep convolutional generative adversarial networks","author":"radford","year":"2015","journal-title":"arXiv 1511 06434"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2016.2624754"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2004.842368"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2007.4538852"},{"key":"ref5","author":"hartley","year":"2003","journal-title":"Multiple View Geometry in Computer Vision"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10605-2_54"},{"key":"ref49","first-page":"1","article-title":"Unrolled generative adversarial networks","author":"metz","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2463671"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2658577"},{"key":"ref46","first-page":"1558","article-title":"Autoencoding beyond pixels using a learned similarity metric","author":"larsen","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref45","first-page":"1","article-title":"Adversarial feature learning","author":"donahue","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref48","first-page":"1","article-title":"Energy-based generative adversarial network","author":"zhao","year":"2016","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref47","first-page":"271","article-title":"f-GAN: Training generative neural samplers using variational divergence minimization","author":"nowozin","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref42","first-page":"2172","article-title":"InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets","author":"chen","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref41","article-title":"Conditional generative adversarial nets","author":"mirza","year":"2014","journal-title":"arXiv 1411 1784"},{"key":"ref44","first-page":"1","article-title":"Mode regularized generative adversarial networks","author":"che","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref73","first-page":"3675","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","author":"kulkarni","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022140919877"},{"key":"ref71","first-page":"1","article-title":"Gated path planning networks","author":"lee","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref70","first-page":"1","article-title":"Teaching a machine to read maps with deep reinforcement learning","author":"brunner","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref76","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref77","first-page":"1","article-title":"Near-optimal representation learning for hierarchical reinforcement learning","author":"nachum","year":"2019","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref74","first-page":"1","article-title":"A deep hierarchical approach to lifelong learning in minecraft","author":"tessler","year":"2017","journal-title":"Proc 31st AAAI Conf Artif Intell"},{"key":"ref75","first-page":"2661","article-title":"Zero-shot task generalization with multi-task deep reinforcement learning","volume":"70","author":"oh","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref79","first-page":"1","article-title":"The option-critic architecture","author":"bacon","year":"2017","journal-title":"Proc 31st AAAI Conf Artif Intell"},{"key":"ref60","article-title":"DeepMind lab","author":"beattie","year":"2016","journal-title":"arXiv 1612 03801"},{"key":"ref62","article-title":"Recurrent reinforcement learning: A hybrid approach","author":"li","year":"2015","journal-title":"arXiv 1509 03044"},{"key":"ref61","article-title":"Memory approaches to reinforcement learning in non-Markovian domains","author":"lin","year":"1992"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-018-0102-6"},{"key":"ref65","first-page":"1","article-title":"Active neural localization","author":"chaplot","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2932575"},{"key":"ref67","article-title":"Playing doom with SLAM-augmented deep reinforcement learning","author":"bhatti","year":"2016","journal-title":"arXiv 1612 00380"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.769"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/70.880813"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/700"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2010.12.003"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref95","first-page":"1","article-title":"One-shot reinforcement learning for robot navigation with interactive replay","author":"bruce","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref108","first-page":"2422","article-title":"Mind&#x2019;s eye: A recurrent visual representation for image caption generation","author":"chen","year":"2015","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit (CVPR)"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1038\/nature20101"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"ref93","article-title":"Neural turing machines","author":"graves","year":"2014","journal-title":"arXiv 1410 5401"},{"key":"ref106","article-title":"Neural SLAM: Learning to explore with external memory","author":"zhang","year":"2017","journal-title":"arXiv 1706 09520"},{"key":"ref92","first-page":"2827","article-title":"Neural episodic control","volume":"70","author":"pritzel","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref105","first-page":"1","article-title":"Memory augmented control networks","author":"khan","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref91","article-title":"Memory networks","author":"weston","year":"2014","journal-title":"arXiv 1410 3916"},{"key":"ref104","first-page":"1","article-title":"Neural map: Structured memory for deep reinforcement learning","author":"parisotto","year":"2017","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref90","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"chung","year":"2014","journal-title":"arXiv 1412 3555"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860433"},{"key":"ref102","first-page":"1","article-title":"Episodic curiosity through reachability","author":"savinov","year":"2019","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00387"},{"key":"ref112","first-page":"3314","article-title":"Speaker-follower models for vision-and-language navigation","author":"fried","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.501"},{"key":"ref98","first-page":"889","article-title":"Hippocampal contributions to control: The third way","author":"lengyel","year":"2008","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-psych-122414-033625"},{"key":"ref96","first-page":"15246","article-title":"Search on the replay buffer: Bridging planning and reinforcement learning","author":"eysenbach","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref97","first-page":"1","article-title":"Control of memory, active perception, and action in minecraft","author":"oh","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref10","article-title":"From seeing to moving: A survey on learning for visual indoor navigation (VIN)","author":"ye","year":"2020","journal-title":"arXiv 2002 11310"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref14","first-page":"1","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref15","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref118","first-page":"1","article-title":"Meta-learning and universality: Deep representations and gradient descent can approximate any learning algorithm","author":"finn","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref16","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2805379"},{"key":"ref117","first-page":"9516","article-title":"Probabilistic model-agnostic meta-learning","author":"finn","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793604"},{"key":"ref81","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume":"70","author":"vezhnevets","year":"2017","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref18","article-title":"From seeing to moving: A survey on learning for visual indoor navigation (VIN)","author":"ye","year":"2020","journal-title":"arXiv 2002 11310"},{"key":"ref84","first-page":"1","article-title":"Knowledge transfer for deep reinforcement learning with hierarchical experience replay","author":"yin","year":"2017","journal-title":"Proc 31st AAAI Conf Artif Intell"},{"key":"ref119","first-page":"1","article-title":"Continuous adaptation via meta-learning in nonstationary and competitive environments","author":"al-shedivat","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref83","first-page":"1","article-title":"Policy distillation","author":"rusu","year":"2015","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00750"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00679"},{"key":"ref116","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume":"70","author":"finn","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015175"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2965857"},{"key":"ref120","first-page":"4603","article-title":"A generalized model for multimodal perception","author":"shiang","year":"2017","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2994027"},{"key":"ref85","first-page":"4496","article-title":"Distral: Robust multitask reinforcement learning","author":"teh","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref86","article-title":"Progressive neural networks","author":"rusu","year":"2016","journal-title":"arXiv 1606 04671"},{"key":"ref87","first-page":"2419","article-title":"Learning to navigate in cities without a map","author":"mirowski","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref88","first-page":"1312","article-title":"Universal value function approximators","author":"schaul","year":"2015","journal-title":"Proc Int Conf Mach Learn"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8948470\/09146614.pdf?arnumber=9146614","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T15:56:38Z","timestamp":1642002998000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9146614\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":121,"URL":"https:\/\/doi.org\/10.1109\/access.2020.3011438","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}