{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T14:59:41Z","timestamp":1779375581345,"version":"3.53.1"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62003381"],"award-info":[{"award-number":["62003381"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1813202"],"award-info":[{"award-number":["U1813202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFB1313600"],"award-info":[{"award-number":["2020YFB1313600"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100011447","name":"Foundation of the Science and Technology Department of Henan Province","doi-asserted-by":"publisher","award":["212102210525"],"award-info":[{"award-number":["212102210525"]}],"id":[{"id":"10.13039\/501100011447","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100011447","name":"Foundation of the Science and Technology Department of Henan Province","doi-asserted-by":"publisher","award":["222102210077"],"award-info":[{"award-number":["222102210077"]}],"id":[{"id":"10.13039\/501100011447","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2023.3347803","type":"journal-article","created":{"date-parts":[[2023,12,28]],"date-time":"2023-12-28T19:45:45Z","timestamp":1703792745000},"page":"2224-2235","source":"Crossref","is-referenced-by-count":4,"title":["Boosting Policy Learning in Reinforcement Learning via Adaptive Intrinsic Reward Regulation"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3758-237X","authenticated-orcid":false,"given":"Qian","family":"Zhao","sequence":"first","affiliation":[{"name":"School of Physics and Telecommunication Engineering, Zhoukou Normal University, Zhoukou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2149-5589","authenticated-orcid":false,"given":"Jinhui","family":"Han","sequence":"additional","affiliation":[{"name":"School of Physics and Telecommunication Engineering, Zhoukou Normal University, Zhoukou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4549-1221","authenticated-orcid":false,"given":"Mao","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Physics and Telecommunication Engineering, Zhoukou Normal University, Zhoukou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref2","first-page":"9663","article-title":"MADE: Exploration via maximizing deviation from explored regions","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zhang"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.890271"},{"key":"ref4","first-page":"2850","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn. (ICML)","author":"Mnih"},{"issue":"98","key":"ref5","first-page":"1","article-title":"On the theory of policy gradient methods: Optimality, approximation, and distribution shift","volume":"22","author":"Agarwal","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref7","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Bellemare"},{"key":"ref8","first-page":"1","article-title":"Exploration by random network distillation","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Burda"},{"key":"ref9","first-page":"1","article-title":"Never give up: Learning directed exploration strategies","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Badia"},{"key":"ref10","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pathak"},{"key":"ref11","first-page":"1","article-title":"Revisiting intrinsic reward for exploration in procedurally generated environments","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref12","first-page":"1","article-title":"Adversarially guided actor-critic","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Flet-Berliac"},{"key":"ref13","first-page":"1","article-title":"RIDE: Rewarding impact-driven exploration for procedurally-generated environments","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Raileanu"},{"key":"ref14","first-page":"12040","article-title":"A provably efficient model-free posterior sampling method for episodic reinforcement learning","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Dann"},{"key":"ref15","first-page":"1","article-title":"Rank the episodes: A simple approach for exploration in procedurally-generated environments","volume-title":"Proc. 9th Int. Conf. Learn. Represent. (ICLR)","author":"Zha"},{"key":"ref16","first-page":"1","article-title":"How can we define intrinsic motivation?","volume-title":"Proc. 8th Int. Conf. Epigenetic Robot., Modelling Cogn. Develop. Robotic Syst.","author":"Oudeyer"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"ref18","first-page":"2721","article-title":"Count-based exploration with neural density models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ostrovski"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/11871842_29"},{"key":"ref20","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"Stadie","year":"2015","journal-title":"arXiv:1507.00814"},{"key":"ref21","first-page":"1","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Ho"},{"key":"ref22","first-page":"1","article-title":"Learning to understand goal specifications by modelling reward","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Bahdanau"},{"key":"ref23","first-page":"1515","article-title":"Automatic goal generation for reinforcement learning agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Florensa"},{"key":"ref24","first-page":"1","article-title":"Learning with amigo: Adversarially motivated intrinsic goals","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Campero"},{"key":"ref25","article-title":"Go-explore: A new approach for hard-exploration problems","author":"Ecoffet","year":"2019","journal-title":"arXiv:1901.10995"},{"issue":"124","key":"ref26","first-page":"1","article-title":"Deep exploration via randomized value functions","volume":"20","author":"Osband","year":"2019","journal-title":"J. Mach. Learn. Res."},{"key":"ref27","first-page":"1","article-title":"Parameter space noise for exploration","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Plappert"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1561\/2200000049"},{"key":"ref29","first-page":"397","article-title":"Using confidence bounds for exploitation-exploration trade-offs","volume":"3","author":"Auer","year":"2002","journal-title":"J. Mach. Learn. Res."},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v27i1.8678"},{"key":"ref31","first-page":"1","article-title":"When should agents explore?","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Pislar"},{"key":"ref32","first-page":"1117","article-title":"VIME: Variational information maximizing exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Houthooft"},{"key":"ref33","first-page":"1563","article-title":"Near-optimal regret bounds for reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"21","author":"Auer"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1214\/13-AOS1119"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.1991.170605"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12087"},{"key":"ref37","first-page":"37631","article-title":"Exploration via elliptical episodic bonuses","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Henaff"},{"key":"ref38","volume-title":"Minimalistic Gridworld Environment for OpenAI Gym","author":"Chevalier-Boisvert","year":"2018"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860433"},{"key":"ref40","volume-title":"Miniworld: Minimalistic 3D Environment for RL & Robotics Research","author":"Chevalier-Boisvert","year":"2018"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/10380310\/10375485.pdf?arnumber=10375485","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T03:59:36Z","timestamp":1706068776000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10375485\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/access.2023.3347803","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}