{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:40:26Z","timestamp":1765546826026,"version":"3.37.3"},"reference-count":53,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002367","name":"Chinese Academy of Sciences","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9561740","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:28:35Z","timestamp":1634689715000},"page":"6422-6428","source":"Crossref","is-referenced-by-count":1,"title":["DIMSAN: Fast Exploration with the Synergy between Density-based Intrinsic Motivation and Self-adaptive Action Noise"],"prefix":"10.1109","author":[{"given":"Jiayi","family":"Li","sequence":"first","affiliation":[{"name":"University of Chinese Academy of Sciences,School of Artificial Intelligence,Beijing,China"}]},{"given":"Boyao","family":"Li","sequence":"additional","affiliation":[{"name":"China Academy of Launch Vehicle Technology,Research and Development Department,Beijing,China"}]},{"given":"Tao","family":"Lu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,State Key Laboratory of Management and Control for Complex Systems,Beijing,China"}]},{"given":"Ning","family":"Lu","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences,School of Artificial Intelligence,Beijing,China"}]},{"given":"Yinghao","family":"Cai","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,State Key Laboratory of Management and Control for Complex Systems,Beijing,China"}]},{"given":"Shuo","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences,School of Artificial Intelligence,Beijing,China"}]}],"member":"263","reference":[{"key":"ref39","first-page":"1109","article-title":"Vime: Variational information maximizing exploration","author":"houthooft","year":"2016","journal-title":"Neural Information Processing Systems"},{"key":"ref38","first-page":"2753","article-title":"#Exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Neural Information Processing Systems"},{"article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref33"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"key":"ref31","first-page":"2137","article-title":"Provably efficient reinforcement learning with linear function approximation","author":"jin","year":"2020","journal-title":"Conference on Learning Theory"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"article-title":"Noisy networks for exploration","year":"2017","author":"fortunato","key":"ref37"},{"article-title":"Parameter space noise for exploration","year":"2017","author":"plappert","key":"ref36"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87481-2_16"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207681"},{"key":"ref28","first-page":"12623","article-title":"Curriculum-guided hindsight experience replay","author":"fang","year":"2019","journal-title":"Neural Information Processing Systems"},{"key":"ref27","article-title":"DHER: Hindsight experience replay for dynamic goals","author":"fang","year":"2018","journal-title":"Conference on Robot Learning"},{"article-title":"Generalized hindsight for reinforcement learning","year":"2020","author":"li","key":"ref29"},{"key":"ref2","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"article-title":"Count-based exploration with neural density models","year":"2017","author":"ostrovski","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.890271"},{"article-title":"Curiosity-driven experience prioritization via density estimation","year":"2019","author":"zhao","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","year":"2015","author":"stadie","key":"ref23"},{"key":"ref26","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Neural Information Processing Systems"},{"article-title":"Large-scale study of curiosity-driven learning","year":"2018","author":"burda","key":"ref25"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO49542.2019.8961529"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","year":"2018","author":"plappert","key":"ref53"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref52"},{"key":"ref10","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels. International Conference on Machine Learning","author":"hafner","year":"2019","journal-title":"PMLR"},{"key":"ref11","first-page":"5690","article-title":"Imagination-Augmented Agents for Deep Reinforcement Learning","author":"racani\u00e8re","year":"2017","journal-title":"Neural Information Processing Systems"},{"article-title":"Self-supervised exploration via disagreement","year":"2019","author":"pathak","key":"ref40"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref12"},{"journal-title":"A review of robot learning for manipulation Challenges representations and algorithms","year":"2019","author":"kroemer","key":"ref13"},{"article-title":"Learning by playing-solving sparse reward tasks from scratch","year":"2018","author":"riedmiller","key":"ref14"},{"key":"ref15","first-page":"361","article-title":"Planning by prioritized sweeping with small backups","author":"van seijen","year":"2013","journal-title":"International Conference on Machine Learning"},{"article-title":"Prioritized experience replay","year":"2015","author":"schaul","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"ref18","first-page":"4026","article-title":"Deep exploration via bootstrapped DQN","author":"osband","year":"2016","journal-title":"Neural Information Processing Systems"},{"key":"ref19","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"2016","journal-title":"Neural Information Processing Systems"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref4"},{"article-title":"High-dimensional continuous control using generalized advantage estimation","year":"2015","author":"schulman","key":"ref3"},{"article-title":"Qt-opt: Scalable deep reinforcement learning for vision-based robotic manipulation","year":"2018","author":"kalashnikov","key":"ref6"},{"article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","year":"2018","author":"haarnoja","key":"ref5"},{"article-title":"Action-conditional video prediction using deep networks in atari games","year":"2015","author":"oh","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917710318"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177728190"},{"article-title":"Dynamic planning networks","year":"2018","author":"tasfi","key":"ref9"},{"article-title":"Skew-fit: State-covering self-supervised reinforcement learning","year":"2019","author":"pong","key":"ref46"},{"article-title":"Visual reinforcement learning with imagined goals","year":"2018","author":"nair","key":"ref45"},{"key":"ref48","first-page":"1312","article-title":"Universal value function approximators","author":"schaul","year":"2015","journal-title":"International Conference on Machine Learning"},{"article-title":"Curiosity-driven multi-criteria hindsight experience replay","year":"2019","author":"lanier","key":"ref47"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"article-title":"Competitive experience replay","year":"2019","author":"liu","key":"ref41"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197421"},{"article-title":"Energy-based hindsight experience prioritization","year":"2018","author":"zhao","key":"ref43"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2021,5,30]]},"location":"Xi'an, China","end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09561740.pdf?arnumber=9561740","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T23:21:12Z","timestamp":1659482472000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9561740\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9561740","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}