{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T18:40:09Z","timestamp":1746470409059,"version":"3.40.4"},"reference-count":66,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076080"],"award-info":[{"award-number":["62076080"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005230","name":"Natural Science Foundation of ChongQing","doi-asserted-by":"publisher","award":["CSTB2022NSCQ-MSX0922"],"award-info":[{"award-number":["CSTB2022NSCQ-MSX0922"]}],"id":[{"id":"10.13039\/501100005230","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1109\/tnnls.2024.3428323","type":"journal-article","created":{"date-parts":[[2024,9,20]],"date-time":"2024-09-20T17:25:43Z","timestamp":1726853143000},"page":"9238-9252","source":"Crossref","is-referenced-by-count":0,"title":["Progressively Learning to Reach Remote Goals by Continuously Updating Boundary Goals"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5917-1803","authenticated-orcid":false,"given":"Mengxuan","family":"Shao","sequence":"first","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5076-2412","authenticated-orcid":false,"given":"Haiqi","family":"Zhu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3434-9967","authenticated-orcid":false,"given":"Debin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"}]},{"given":"Kun","family":"Han","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8342-1211","authenticated-orcid":false,"given":"Feng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1810-5412","authenticated-orcid":false,"given":"Shaohui","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6989-766X","authenticated-orcid":false,"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref3","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"arXiv:1312.5602"},{"key":"ref4","first-page":"20","article-title":"AlphaStar: Mastering the real-time strategy game StarCraft II","volume":"2","author":"Vinyals","year":"2019","journal-title":"DeepMind Blog"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33011206"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3029475"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220122"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561384"},{"key":"ref11","first-page":"5055","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"30","author":"Andrychowicz"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3107202"},{"key":"ref13","first-page":"12623","article-title":"Curriculum-guided hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"32","author":"Fang"},{"key":"ref14","first-page":"1","article-title":"Reinforcement learning with sparse rewards using guidance from offline demonstration","volume-title":"Proc. 10th Int. Conf. Learn. Represent. (ICLR)","author":"Rengarajan"},{"key":"ref15","first-page":"1","article-title":"Exploration via hindsight goal generation","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"32","author":"Ren"},{"key":"ref16","first-page":"1025","article-title":"Relay policy learning: Solving long-horizon tasks via imitation and reinforcement learning","volume-title":"Proc. 3rd Conf. Robot Learn. (CoRL)","volume":"100","author":"Gupta"},{"key":"ref17","first-page":"188","article-title":"Accelerating reinforcement learning with learned skill priors","volume-title":"Proc. 4th Conf. Robot Learn. (CoRL)","volume":"155","author":"Pertsch"},{"key":"ref18","first-page":"1","article-title":"Wish you were here: Hindsight goal selection for long-horizon dexterous manipulation","volume-title":"Proc. 10th Int. Conf. Learn. Represent. (ICLR)","author":"Davchev"},{"key":"ref19","article-title":"Exploration by random network distillation","author":"Burda","year":"2018","journal-title":"arXiv:1810.12894"},{"key":"ref20","first-page":"1","article-title":"On bonus based exploration methods in the arcade learning environment","volume-title":"Proc. 8th Int. Conf. Learn. Represent. (ICLR)","author":"Taiga"},{"key":"ref21","article-title":"The Alberta plan for AI research","author":"Sutton","year":"2022","journal-title":"arXiv:2208.11173"},{"key":"ref22","first-page":"1","article-title":"Learning long-term reward redistribution via randomized return decomposition","volume-title":"Proc. 10th Int. Conf. Learn. Represent. (ICLR)","author":"Ren"},{"key":"ref23","first-page":"7783","article-title":"Skew-fit: State-covering self-supervised reinforcement learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn. (ICML)","volume":"119","author":"Pong"},{"key":"ref24","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schaul"},{"key":"ref25","first-page":"1","article-title":"Hindsight policy gradients","volume-title":"Proc. 7th Int. Conf. Learn. Represent. (ICLR)","author":"Rauber"},{"key":"ref26","first-page":"1","article-title":"DHER: Hindsight experience replay for dynamic goals","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Fang"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2023.3296765"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3088947"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3124366"},{"key":"ref30","first-page":"1","article-title":"Learning to reach goals via iterated supervised learning","volume-title":"Proc. 9th Int. Conf. Learn. Represent. (ICLR)","author":"Ghosh"},{"key":"ref31","first-page":"1","article-title":"Rethinking goal-conditioned supervised learning and its connection to offline RL","volume-title":"Proc. 10th Int. Conf. Learn. Represent. (ICLR)","author":"Yang"},{"key":"ref32","first-page":"577","article-title":"Principled exploration via optimistic bootstrapping and backward induction","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Bai"},{"key":"ref33","first-page":"4607","article-title":"Randomized exploration in reinforcement learning with general value function approximation","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Ishfaq"},{"key":"ref34","first-page":"1","article-title":"Contingency-aware exploration in reinforcement learning","volume-title":"Proc. 7th Int. Conf. Learn. Represent. (ICLR)","author":"Choi"},{"key":"ref35","first-page":"1","article-title":"Optimistic exploration even with a pessimistic initialisation","volume-title":"Proc. 8th Int. Conf. Learn. Represent. (ICLR)","author":"Rashid"},{"key":"ref36","first-page":"1479","article-title":"Unifying count-based exploration and intrinsic motivation","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"29","author":"Bellemare"},{"key":"ref37","first-page":"2721","article-title":"Count-based exploration with neural density models","volume-title":"Proc. 34th Int. Conf. Mach. Learn. (ICML)","volume":"70","author":"Ostrovski"},{"key":"ref38","first-page":"2754","article-title":"#exploration: A study of count-based exploration for deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"30","author":"Tang"},{"key":"ref39","first-page":"25377","article-title":"Semantic exploration from language abstractions and pretrained representations","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"35","author":"Tam"},{"key":"ref40","first-page":"33947","article-title":"Improving intrinsic exploration with language abstractions","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"35","author":"Mu"},{"key":"ref41","first-page":"8657","article-title":"Guiding pretraining in reinforcement learning with large language models","volume-title":"Proc. 40th Int. Conf. Mach. Learn. (ICML)","volume":"202","author":"Du"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/3115.003.0030"},{"key":"ref43","first-page":"25217","article-title":"Noveld: A simple yet effective exploration criterion","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"34","author":"Zhang"},{"key":"ref44","first-page":"11920","article-title":"Reinforcement learning with prototypical representations","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Yarats"},{"key":"ref45","first-page":"1","article-title":"Dora the explorer: Directed outreaching reinforcement action-selection","volume-title":"Proc. 6th Int. Conf. Learn. Represent. (ICLR)","author":"Fox"},{"key":"ref46","first-page":"1","article-title":"Learning to perform physics experiments via deep reinforcement learning","volume-title":"Proc. 5th Int. Conf. Learn. Represent. (ICLR)","author":"Denil"},{"key":"ref47","first-page":"1","article-title":"Boosted curriculum reinforcement learning","volume-title":"Proc. 10th Int. Conf. Learn. Represent. (ICLR)","author":"Klink"},{"issue":"1","key":"ref48","first-page":"1","article-title":"Curriculum learning for reinforcement learning domains: A framework and survey","volume":"21","author":"Narvekar","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref49","first-page":"9216","article-title":"Self-paced deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"33","author":"Klink"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/353"},{"key":"ref51","first-page":"1515","article-title":"Automatic goal generation for reinforcement learning agents","volume-title":"Proc. 35th Int. Conf. Mach. Learn. (ICML)","volume":"80","author":"Florensa"},{"key":"ref52","first-page":"2660","article-title":"ELF: An extensive, lightweight and flexible research platform for real-time strategy games","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"30","author":"Tian"},{"key":"ref53","first-page":"1","article-title":"Training agent for first-person shooter game with actor-critic curriculum learning","volume-title":"Proc. 5th Int. Conf. Learn. Represent. (ICLR)","author":"Wu"},{"key":"ref54","first-page":"1","article-title":"Learning to multi-task by active sampling","volume-title":"Proc. 6th Int. Conf. Learn. Represent. (ICLR)","author":"Sharma"},{"key":"ref55","first-page":"1","article-title":"Minimax curriculum learning: Machine teaching with desirable difficulties and scheduled diversity","volume-title":"Proc. 6th Int. Conf. Learn. Represent. (ICLR)","author":"Zhou"},{"key":"ref56","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"Plappert","year":"2018","journal-title":"arXiv:1802.09464"},{"key":"ref57","first-page":"14783","article-title":"Rewriting history with inverse rl: Hindsight inference for policy improvement","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NIPS)","volume":"33","author":"Eysenbach"},{"key":"ref58","first-page":"1259","article-title":"A divergence minimization perspective on imitation learning methods","volume-title":"Proc. 3rd Conf. Robot Learn. (CoRL)","volume":"100","author":"Ghasemipour"},{"key":"ref59","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019","journal-title":"arXiv:1911.11361"},{"key":"ref60","first-page":"214","article-title":"Wasserstein generative adversarial networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn. (ICML)","volume":"70","author":"Arjovsky"},{"key":"ref61","first-page":"4446","article-title":"A Riemannian block coordinate descent method for computing the projection robust Wasserstein distance","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Huang"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3014137"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref64","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160985"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/10982361\/10684786.pdf?arnumber=10684786","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T17:58:27Z","timestamp":1746467907000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10684786\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5]]},"references-count":66,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2024.3428323","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2025,5]]}}}