{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T01:53:46Z","timestamp":1778810026977,"version":"3.51.4"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,7]]},"DOI":"10.1109\/ijcnn.2019.8852254","type":"proceedings-article","created":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T03:44:32Z","timestamp":1569901472000},"page":"1-8","source":"Crossref","is-referenced-by-count":9,"title":["Curious Meta-Controller: Adaptive Alternation between Model-Based and Model-Free Control in Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Muhammad Burhan","family":"Hafez","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cornelius","family":"Weber","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthias","family":"Kerzel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2017.8172289"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2013.6696520"},{"key":"ref30","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"Proceedings of the International Conference on Learning Representations (ICLR)"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2013.09.001"},{"key":"ref11","first-page":"509","article-title":"Slowness-based neural visuomotor control with an Intrinsically motivated Continuous Actor-Critic","author":"hafez","year":"2018","journal-title":"Proceedings of the 26th European Symposium on Artificial Neural Networks Computational Intelligence and Machine Learning (ESANN)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1515\/pjbr-2019-0005"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-015-1861-8"},{"key":"ref14","first-page":"1109","article-title":"VIME: variational information maximizing exploration","author":"houthooft","year":"2016","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref15","first-page":"2597","article-title":"Self-correcting models for model-based reinforcement learning","author":"talvitie","year":"2017","journal-title":"Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref18","first-page":"5690","article-title":"Imagination-augmented agents for deep reinforcement learning","author":"racani\u00e8re","year":"2017","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref19","first-page":"195","article-title":"Uncertainty-driven imagination for continuous deep reinforcement learning","author":"kalweit","year":"2017","journal-title":"Proceedings of the 1st Annual Conference on Robot Learning volume 78 of Proceedings of Machine Learning Research"},{"key":"ref28","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref4","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in Neural Information Systems (NIPS)"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1162\/jocn_a_01263"},{"key":"ref3","article-title":"Prioritized experience replay","author":"schaul","year":"2016","journal-title":"Proceedings of the International Conference on Learning Representations (ICLR)"},{"key":"ref6","first-page":"2750","article-title":"# Exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_7"},{"key":"ref5","first-page":"2721","article-title":"Count-based exploration with neural density models","author":"ostrovski","year":"2017","journal-title":"Proceedings of the International Conference on Machine Learning (ICML)"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref7","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"stadie","year":"2015"},{"key":"ref2","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2010.2056368"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref20","first-page":"2829","article-title":"Continuous deep Q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"Proceedings of the International Conference on Machine Learning (ICML)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2018.8489122"},{"key":"ref21","first-page":"4732","article-title":"Universal planning networks","author":"srinivas","year":"2018","journal-title":"Proceedings of the International Conference on Machine Learning (ICML)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1518488112"},{"key":"ref23","article-title":"Temporal difference models: Model-free deep RL for model-based control","author":"pong","year":"2018","journal-title":"Proceedings of the International Conference on Learning Representations (ICLR)"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1609094113"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1506367112"}],"event":{"name":"2019 International Joint Conference on Neural Networks (IJCNN)","location":"Budapest, Hungary","start":{"date-parts":[[2019,7,14]]},"end":{"date-parts":[[2019,7,19]]}},"container-title":["2019 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8840768\/8851681\/08852254.pdf?arnumber=8852254","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:48:46Z","timestamp":1658094526000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8852254\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2019.8852254","relation":{},"subject":[],"published":{"date-parts":[[2019,7]]}}}