{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:11:40Z","timestamp":1759331500067,"version":"build-2065373602"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,17]],"date-time":"2025-08-17T00:00:00Z","timestamp":1755388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,17]],"date-time":"2025-08-17T00:00:00Z","timestamp":1755388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,17]]},"DOI":"10.1109\/case58245.2025.11163912","type":"proceedings-article","created":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T17:24:07Z","timestamp":1758648247000},"page":"2180-2187","source":"Crossref","is-referenced-by-count":0,"title":["Bounded Active Exploration for Model-Based Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Ting","family":"Qiao","sequence":"first","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]},{"given":"Henry","family":"Williams","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]},{"given":"Bruce A.","family":"MacDonald","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]}],"member":"263","reference":[{"article-title":"A survey on intrinsic motivation in reinforcement learning","year":"2019","author":"Aubret","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"issue":"Oct","key":"ref3","first-page":"213","article-title":"R-max-a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"Brafman","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref4","article-title":"Sample-efficient reinforcement learning with stochastic ensemble value expansion","volume":"31","author":"Buckman","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/aba6f3"},{"key":"ref6","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"Chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref7","first-page":"1184","article-title":"Decomposition of uncertainty in bayesian deep learning for efficient and risk-sensitive learning","volume-title":"International conference on machine learning.","author":"Depeweg","year":"2018"},{"article-title":"Diversity is all you need: Learning skills without a reward function","year":"2018","author":"Eysenbach","key":"ref8"},{"key":"ref9","first-page":"1486","article-title":"Value-aware loss function for model-based reinforcement learning","volume-title":"Artificial Intelligence and Statistics.","author":"Farahmand","year":"2017"},{"article-title":"Model-based value estimation for efficient model-free reinforcement learning","year":"2018","author":"Feinberg","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-023-10562-9"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2024.3457538"},{"article-title":"Curiosity-driven exploration in deep reinforcement learning via bayesian neural networks","year":"2016","author":"Houthooft","key":"ref13"},{"key":"ref14","article-title":"When to trust your model: Model-based policy optimization","volume":"32","author":"Janner","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"article-title":"Model-ensemble trust-region policy optimization","year":"2018","author":"Kurutach","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.003"},{"article-title":"Objective mismatch in model-based reinforcement learning","year":"2020","author":"Lambert","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3103\/s0147688224700370"},{"article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","year":"2018","author":"Levine","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref22","article-title":"Can you trust your model\u2019s uncertainty? evaluating predictive uncertainty under dataset shift","volume":"32","author":"Ovadia","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref23","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"International conference on machine learning","author":"Pathak","year":"2019"},{"article-title":"Bounded exploration with world model uncertainty in soft actor-critic reinforcement learning algorithm","year":"2024","author":"Qiao","key":"ref24"},{"article-title":"Episodic curiosity through reachability","year":"2018","author":"Savinov","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"article-title":"Deepmind control suite","year":"2018","author":"Tassa","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801857"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561842"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802715"}],"event":{"name":"2025 IEEE 21st International Conference on Automation Science and Engineering (CASE)","start":{"date-parts":[[2025,8,17]]},"location":"Los Angeles, CA, USA","end":{"date-parts":[[2025,8,21]]}},"container-title":["2025 IEEE 21st International Conference on Automation Science and Engineering (CASE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11163731\/11163732\/11163912.pdf?arnumber=11163912","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T12:51:00Z","timestamp":1759236660000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11163912\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,17]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/case58245.2025.11163912","relation":{},"subject":[],"published":{"date-parts":[[2025,8,17]]}}}