{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:37:54Z","timestamp":1780418274004,"version":"3.54.1"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62103403"],"award-info":[{"award-number":["62103403"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Basic Key Research Project","award":["JCYJ20200109115414354"],"award-info":[{"award-number":["JCYJ20200109115414354"]}]},{"name":"Shenzhen Science and Technology Funding","award":["KCXST20221021111210023"],"award-info":[{"award-number":["KCXST20221021111210023"]}]},{"DOI":"10.13039\/501100010877","name":"Shenzhen Science and Technology Innovation Commission","doi-asserted-by":"publisher","award":["JCYJ20210324115800002"],"award-info":[{"award-number":["JCYJ20210324115800002"]}],"id":[{"id":"10.13039\/501100010877","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002367","name":"International Partnership Program of the Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["321GJHZ2022057MI"],"award-info":[{"award-number":["321GJHZ2022057MI"]}],"id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1109\/tnnls.2023.3329513","type":"journal-article","created":{"date-parts":[[2023,11,9]],"date-time":"2023-11-09T14:00:57Z","timestamp":1699538457000},"page":"475-485","source":"Crossref","is-referenced-by-count":25,"title":["Relative Entropy Regularized Sample-Efficient Reinforcement Learning With Continuous Actions"],"prefix":"10.1109","volume":"36","author":[{"given":"Zhiwei","family":"Shang","sequence":"first","affiliation":[{"name":"Systems Hub, The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Renxing","family":"Li","sequence":"additional","affiliation":[{"name":"School of Software Engineering, University of Science and Technology of China, Hefei, Anhui, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6973-1827","authenticated-orcid":false,"given":"Chunhua","family":"Zheng","sequence":"additional","affiliation":[{"name":"CAS Key Laboratory of Human-Machine Intelligence-Synergy Systems and the Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0157-1393","authenticated-orcid":false,"given":"Huiyun","family":"Li","sequence":"additional","affiliation":[{"name":"CAS Key Laboratory of Human-Machine Intelligence-Synergy Systems and the Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5539-4260","authenticated-orcid":false,"given":"Yunduan","family":"Cui","sequence":"additional","affiliation":[{"name":"CAS Key Laboratory of Human-Machine Intelligence-Synergy Systems and the Guangdong-Hong Kong-Macao Joint Laboratory of Human-Machine Intelligence-Synergy Systems, Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref5","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Lillicrap"},{"key":"ref6","first-page":"1861","article-title":"Soft actor\u2013critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref7","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2319733"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2852711"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2884797"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2927869"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793485"},{"key":"ref14","first-page":"188","article-title":"Accelerating reinforcement learning with learned skill priors","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Pertsch"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2021.3089476"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref17","first-page":"262","article-title":"Sim-to-real robot learning from pixels with progressive nets","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Rusu"},{"key":"ref18","first-page":"1587","article-title":"Addressing function approximation error in actor\u2013critic methods","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Fujimoto"},{"key":"ref19","first-page":"1792","article-title":"Visualizing and understanding Atari agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Greydanus"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2980743"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3082568"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3215596"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"113","DOI":"10.32604\/jai.2020.010137","article-title":"Truly proximal policy optimization","author":"Wang","year":"2020","journal-title":"Uncertainty in Artificial Intelligence"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/510"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3044196"},{"issue":"1","key":"ref26","first-page":"3207","article-title":"Dynamic policy programming","volume":"13","author":"Azar","year":"2012","journal-title":"J. Mach. Learn. Res."},{"key":"ref27","first-page":"2995","article-title":"Theoretical analysis of efficiency and robustness of softmax and gap-increasing operators in reinforcement learning","volume-title":"Proc. Int. Conf. Artif. Intell. Statist. (AISTATS)","author":"Kozuno"},{"key":"ref28","first-page":"12163","article-title":"Leverage the average: An analysis of KL regularization in reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Vieillard"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2017.06.007"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2018.11.004"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2020.104331"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2022.107658"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3155483"},{"key":"ref34","first-page":"243","article-title":"An alternative softmax operator for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Asadi"},{"key":"ref35","first-page":"6287","article-title":"Boltzmann exploration done right","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Cesa-Bianchi"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63833-7_18"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7503.003.0176"},{"key":"ref38","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/365"},{"issue":"1","key":"ref41","first-page":"105","article-title":"PaddlePaddle: An open-source deep learning platform from industrial practice","volume":"1","author":"Ma","year":"2019","journal-title":"Frontiers Data Domputing"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340956"},{"issue":"11","key":"ref43","first-page":"1","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10832116\/10313993.pdf?arnumber=10313993","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:39:25Z","timestamp":1764959965000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10313993\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":43,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3329513","relation":{"has-preprint":[{"id-type":"doi","id":"10.36227\/techrxiv.20141084.v1","asserted-by":"object"},{"id-type":"doi","id":"10.36227\/techrxiv.20141084","asserted-by":"object"}]},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1]]}}}