{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T04:08:24Z","timestamp":1748750904251,"version":"3.41.0"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62192751"],"award-info":[{"award-number":["62192751"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key R&amp;D Project of China","award":["2017YFC0704100"],"award-info":[{"award-number":["2017YFC0704100"]}]},{"name":"111 International Collaboration Program of China","award":["B25027"],"award-info":[{"award-number":["B25027"]}]},{"name":"BNRist Program","award":["BNR2019TD01009"],"award-info":[{"award-number":["BNR2019TD01009"]}]},{"name":"National Innovation Center of High Speed Train R&amp;D Project","award":["CX\/KJ-2020-0006"],"award-info":[{"award-number":["CX\/KJ-2020-0006"]}]},{"name":"InnoHK Initiative, The Government of HKSAR"},{"name":"Laboratory for AI-Powered Financial Technologies"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/lra.2025.3572822","type":"journal-article","created":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T18:10:39Z","timestamp":1747937439000},"page":"6896-6903","source":"Crossref","is-referenced-by-count":0,"title":["Maximum Next-State Entropy for Efficient Reinforcement Learning"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3262-4905","authenticated-orcid":false,"given":"Dianyu","family":"Zhong","sequence":"first","affiliation":[{"name":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8748-1964","authenticated-orcid":false,"given":"Yiqin","family":"Yang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3839-7747","authenticated-orcid":false,"given":"Ziyou","family":"Zhang","sequence":"additional","affiliation":[{"name":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3081-6038","authenticated-orcid":false,"given":"Yuhua","family":"Jiang","sequence":"additional","affiliation":[{"name":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1111-1529","authenticated-orcid":false,"given":"Bo","family":"Xu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7952-5621","authenticated-orcid":false,"given":"Qianchuan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Reinforcement learning from imperfect demonstrations","year":"2018","author":"Gao","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2970945"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460756"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3005126"},{"key":"ref5","first-page":"1255","article-title":"Modeling interaction via the principle of maximum causal entropy","volume-title":"Proc. 27th Int. Conf. Int. Conf. Mach. Learn.","author":"Ziebart","year":"2010"},{"key":"ref6","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2017"},{"key":"ref7","first-page":"202","article-title":"Taming the noise in reinforcement learning via soft updates","volume-title":"Proc. 32nd Conf. Uncertainty Artif. Intell.","author":"Fox","year":"2016"},{"key":"ref8","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"article-title":"Soft actor-critic algorithms and applications","year":"2018","author":"Haarnoja","key":"ref9"},{"key":"ref10","first-page":"151","article-title":"Understanding the impact of entropy on policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ahmed","year":"2019"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2018.06.010"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(01)00281-3"},{"key":"ref13","first-page":"1","article-title":"Exploration by random network distillation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Burda","year":"2018"},{"key":"ref14","first-page":"22594","article-title":"Flipping coins to estimate pseudocounts for exploration in reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lobel","year":"2023"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5955"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref17","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Yu","year":"2020"},{"key":"ref18","first-page":"3566","article-title":"Learn what not to learn: Action elimination with deep reinforcement learning","volume-title":"Proc. 32nd Int. Conf. Neural Inform. Process. Syst.","author":"Zahavy","year":"2018"},{"key":"ref19","first-page":"6196","article-title":"The natural language of actions","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tennenholtz","year":"2019"},{"key":"ref20","first-page":"941","article-title":"Learning action representations for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chandak","year":"2019"},{"article-title":"Go-Explore: A new approach for hard-exploration problems","year":"2019","author":"Ecoffet","key":"ref21"},{"key":"ref22","first-page":"1479","article-title":"Unifying count-based exploration and intrinsic motivation","volume-title":"Proc. 30th Int. Conf. Neural Inform. Process. Syst.","author":"Bellemare","year":"2016"},{"key":"ref23","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pathak","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref25","article-title":"Never give up: Learning directed exploration strategies","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Badia","year":"2020"},{"key":"ref26","first-page":"25217","article-title":"Noveld: A simple yet effective exploration criterion","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zhang","year":"2021"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1080\/09540099108946587"},{"key":"ref28","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","volume":"48","author":"Mnih","year":"2016"},{"key":"ref29","article-title":"Training agent for first-person shooter game with actor-critic curriculum learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wu","year":"2017"},{"key":"ref30","first-page":"376","article-title":"Action redundancy in reinforcement learning","volume-title":"Proc. Uncertainty Artif. Intell.","author":"Baram","year":"2021"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i15.29652"},{"key":"ref32","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Silver","year":"2014"},{"key":"ref33","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Representations","author":"Lillicrap","year":"2016"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"article-title":"D4RL: Datasets for deep data-driven reinforcement learning","year":"2020","author":"Fu","key":"ref35"},{"key":"ref36","article-title":"Isaac gym: High performance GPU-based physics simulation for robot learning","volume-title":"Proc. Neural Inf. Process. Syst. Track Datasets Benchmarks","volume":"1","author":"Makoviychuk","year":"2021"},{"key":"ref37","first-page":"91","article-title":"Learning to walk in minutes using massively parallel deep reinforcement learning","volume-title":"Proc. Conf. Robot","author":"Rudin","year":"2022"},{"article-title":"Rl baselines3 Zoo","year":"2020","author":"Raffin","key":"ref38"},{"issue":"268","key":"ref39","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"J. Mach. Learn. Res."}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11008675\/11010856.pdf?arnumber=11010856","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T04:56:11Z","timestamp":1748667371000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11010856\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":39,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3572822","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"type":"electronic","value":"2377-3766"},{"type":"electronic","value":"2377-3774"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}