{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T22:44:26Z","timestamp":1761173066036,"version":"build-2065373602"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T00:00:00Z","timestamp":1757980800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T00:00:00Z","timestamp":1757980800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,16]]},"DOI":"10.1109\/icdl63968.2025.11204397","type":"proceedings-article","created":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T17:07:41Z","timestamp":1761066461000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Cyclic Exploration and Exploitation in Surprise Minimizing Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Kinari","family":"Kubota","sequence":"first","affiliation":[{"name":"National Institute of Informatics,Tokyo,Japan,101-8430"}]},{"given":"Taisuke","family":"Kobayashi","sequence":"additional","affiliation":[{"name":"National Institute of Informatics,Tokyo,Japan,101-8430"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","article-title":"Reinforcement learning for robot soccer","volume":"27","author":"Riedmiller","year":"2009","journal-title":"Autonomous Robots"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.1038\/s41562-024-01991-9","article-title":"Building machines that learn and think with people","author":"Collins","year":"2024"},{"article-title":"A study on overfitting in deep reinforcement learning","year":"2018","author":"Zhang","key":"ref4"},{"key":"ref5","first-page":"716","article-title":"On the theory of dynamic programming","volume-title":"Proceedings of the National Academy of Sciences of the United States of America","volume":"38 8","author":"Bellman"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1109\/CVPRW.2017.70","article-title":"Curiosity-driven exploration by self-supervised prediction","author":"Pathak","year":"2017"},{"article-title":"Exploration by random network distillation","year":"2018","author":"Burda","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3061308"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1038\/nrn2787","article-title":"The free-energy principle: a unified brain theory?","volume":"11","author":"Friston","year":"2010","journal-title":"Nature Reviews Neuroscience"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2021.3049907"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aax8177"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2023.2225232"},{"article-title":"Smirl: Surprise minimizing reinforcement learning in unstable environments","year":"2021","author":"Berseth","key":"ref13"},{"article-title":"Auto-encoding variational bayes","year":"2022","author":"Kingma","key":"ref14"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref15"},{"article-title":"Reinforcement learning generalization with surprise minimization","year":"2020","author":"Chen","key":"ref16"},{"article-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2021.1959397"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"article-title":"Adam: A method for stochastic optimization","volume-title":"arXiv preprint arXiv:1412.6980","author":"Kingma","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21236\/ada440280"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.3389\/neuro.12.006.2007","article-title":"What is intrinsic motivation? a typology of computational approaches","volume":"1","author":"Oudeyer","year":"2007","journal-title":"Frontiers in Neurorobotics"}],"event":{"name":"2025 IEEE International Conference on Development and Learning (ICDL)","start":{"date-parts":[[2025,9,16]]},"location":"Prague, Czech Republic","end":{"date-parts":[[2025,9,19]]}},"container-title":["2025 IEEE International Conference on Development and Learning (ICDL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11204361\/11204347\/11204397.pdf?arnumber=11204397","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T05:44:52Z","timestamp":1761111892000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11204397\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,16]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/icdl63968.2025.11204397","relation":{},"subject":[],"published":{"date-parts":[[2025,9,16]]}}}