{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:05:20Z","timestamp":1755907520113,"version":"3.44.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,8]]},"DOI":"10.23919\/acc63710.2025.11107911","type":"proceedings-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:17:51Z","timestamp":1755800271000},"page":"3492-3499","source":"Crossref","is-referenced-by-count":0,"title":["Boosting Exploration in Reinforcement Learning for Sparse Reward Tasks"],"prefix":"10.23919","author":[{"given":"Yuhang","family":"Zhang","sequence":"first","affiliation":[{"name":"Tsinghua University,State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility,Beijing,China"}]},{"given":"Yao","family":"Lyu","sequence":"additional","affiliation":[{"name":"Tsinghua University,State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility,Beijing,China"}]},{"given":"Guojian","family":"Zhan","sequence":"additional","affiliation":[{"name":"Tsinghua University,State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility,Beijing,China"}]},{"given":"Wenjun","family":"Zou","sequence":"additional","affiliation":[{"name":"Tsinghua University,State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility,Beijing,China"}]},{"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University,State Key Laboratory of Intelligent Green Vehicle and Mobility, School of Vehicle and Mobility,Beijing,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.3026111"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-7784-8"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.003"},{"article-title":"Learning by playing solving sparse reward tasks from scratch","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"Riedmiller","key":"ref5"},{"article-title":"Learning to generalize from sparse and underspecified rewards","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"Agarwal","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"article-title":"Making efficient use of demonstrations to solve hard exploration problems","volume-title":"8th International Conference on Learning Representations","author":"Gulcehre","key":"ref8"},{"article-title":"Playing hard exploration games by watching youtube","volume-title":"32nd Conference on Neural Information Processing Systems","author":"Aytar","key":"ref9"},{"article-title":"Learning montezuma\u2019s revenge from a single demonstration","volume-title":"32nd Conference on Neural Information Processing Systems","author":"Salimans","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03157-9"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487172"},{"article-title":"Exploration by random network distillation","volume-title":"Seventh International Conference on Learning Representations","author":"Burda","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5955"},{"article-title":"Improving exploration in evolution strategies for deep reinforcement learning via a population of novelty-seeking agents","volume-title":"Advances in neural information processing systems","author":"Conti","key":"ref15"},{"article-title":"Diversity-driven exploration strategy for deep reinforcement learning","volume-title":"Advances in neural information processing systems","author":"Hong","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3082568"},{"article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"Haarnoja","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1995.478953"},{"article-title":"The total variation distance between high-dimensional gaussians with the same mean","year":"2018","author":"Devroye","key":"ref20"},{"key":"ref21","article-title":"Randomized prior functions for deep reinforcement learning","volume":"31","author":"Osband","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3537087"},{"article-title":"Openai gym","year":"2016","author":"Brockman","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"article-title":"Vime: Variational information maximizing exploration","volume-title":"Advances in neural information processing systems","author":"Houthooft","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2023.3329823"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.commtr.2023.100096"}],"event":{"name":"2025 American Control Conference (ACC)","start":{"date-parts":[[2025,7,8]]},"location":"Denver, CO, USA","end":{"date-parts":[[2025,7,10]]}},"container-title":["2025 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11107441\/11107442\/11107911.pdf?arnumber=11107911","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:47:06Z","timestamp":1755841626000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11107911\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,8]]},"references-count":27,"URL":"https:\/\/doi.org\/10.23919\/acc63710.2025.11107911","relation":{},"subject":[],"published":{"date-parts":[[2025,7,8]]}}}