{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T09:13:24Z","timestamp":1773911604533,"version":"3.50.1"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1109\/tcyb.2025.3637764","type":"journal-article","created":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T18:34:30Z","timestamp":1765305270000},"page":"2271-2282","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Exploration in Actor-Critic Algorithms: An Approach to Incentivize Plausible Novel States"],"prefix":"10.1109","volume":"56","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1039-3744","authenticated-orcid":false,"given":"Chayan","family":"Banerjee","sequence":"first","affiliation":[{"name":"School of Electrical Engineering and Robotics, Queensland University of Technology, Brisbane, QLD, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2033-4249","authenticated-orcid":false,"given":"Zhiyong","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8566-0870","authenticated-orcid":false,"given":"Nasimul","family":"Noman","sequence":"additional","affiliation":[{"name":"School of Information and Physical Sciences, The University of Newcastle, Callaghan, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1008","article-title":"Actor-critic algorithms","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Konda"},{"key":"ref2","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih"},{"key":"ref3","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Silver"},{"key":"ref4","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref5","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref6","article-title":"Noisy networks for exploration","author":"Fortunato","year":"2017","journal-title":"arXiv:1706.10295"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2022.3140375"},{"key":"ref8","first-page":"1479","article-title":"Unifying count-based exploration and intrinsic motivation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Bellemare"},{"key":"ref9","article-title":"Curiosity-driven experience prioritization via density estimation","author":"Zhao","year":"2019","journal-title":"arXiv:1902.08039"},{"key":"ref10","article-title":"#Exploration: A study of count-based exploration for deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Tang"},{"key":"ref11","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"Stadie","year":"2015","journal-title":"arXiv:1507.00814"},{"key":"ref12","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pathak"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1842"},{"key":"ref14","article-title":"Keep various trajectories: Promoting exploration of ensemble policies in continuous control","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3592"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3150802"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2949596"},{"key":"ref18","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref19","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume-title":"Proc. 23rd AAAI Conf. Artif. Intell.","volume":"8","author":"Ziebart"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17297"},{"key":"ref21","article-title":"R\u00e9nyi state entropy for exploration acceleration in reinforcement learning","author":"Yuan","year":"2022","journal-title":"arXiv:2203.04297"},{"key":"ref22","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3174051"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ANZCC56036.2022.9966956"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3091680"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1214"},{"key":"ref28","article-title":"Self-adaptive success rate-based reward shaping for increased reinforcement learning efficiency","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Ma"},{"issue":"2","key":"ref29","first-page":"423","article-title":"Reinforcement learning: An introduction. by Richard\u2019s sutton","volume":"6","author":"Barto","year":"2021","journal-title":"SIAM Rev."},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2016.02.001"},{"key":"ref32","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"},{"key":"ref33","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3126658"},{"key":"ref35","first-page":"734","article-title":"What went wrong? Closing the sim-to-real gap via differentiable causal discovery","volume-title":"Proc. Conf. Robot Learn.","author":"Huang"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160384"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2023.104432"},{"key":"ref38","volume-title":"Pytorch Soft Actor-Critic","author":"Tandon","year":"2021"},{"key":"ref39","volume-title":"Addressing Function Approximation Errors in Actor-Critic Methods","author":"Scott","year":"2021"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177728190"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1155\/2020\/1803525"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8621923"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-021-01275-y"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2016.2626441"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6221036\/11442719\/11288727.pdf?arnumber=11288727","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T04:46:04Z","timestamp":1773895564000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11288727\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":44,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2025.3637764","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4]]}}}