{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:40:51Z","timestamp":1763106051001,"version":"3.32.0"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,14]]},"DOI":"10.1109\/iros58592.2024.10801857","type":"proceedings-article","created":{"date-parts":[[2024,12,25]],"date-time":"2024-12-25T19:17:39Z","timestamp":1735154259000},"page":"587-594","source":"Crossref","is-referenced-by-count":2,"title":["Image-Based Deep Reinforcement Learning with Intrinsically Motivated Stimuli: On the Execution of Complex Robotic Tasks"],"prefix":"10.1109","author":[{"given":"David","family":"Valencia","sequence":"first","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]},{"given":"Henry","family":"Williams","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]},{"given":"Yuning","family":"Xing","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]},{"given":"Trevor","family":"Gee","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]},{"given":"Minas","family":"Liarokapis","sequence":"additional","affiliation":[{"name":"The University of Auckland,New Dexterity Research Group,New Zealand"}]},{"given":"Bruce A.","family":"MacDonald","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-32375-1_2"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1037\/0003-066x.55.1.68"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/e25020327"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1515\/pjbr-2017-0004"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/13780.003.0019"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-32375-1_1"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00907"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.nlm.2019.03.005"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1097\/PSY.0b013e318052e27d"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.neubiorev.2015.05.002"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1037\/0882-7974.11.3.449"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.conb.2019.08.004"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.bbr.2015.01.015"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuropsychologia.2009.01.015"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.neubiorev.2009.08.006"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1523\/JNEUROSCI.5331-09.2010"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"ref18","first-page":"741","article-title":"Stochastic latent actor-critic: Deep reinforcement learning with a latent variable model","volume":"33","author":"Lee","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref19","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","volume-title":"International conference on machine learning","author":"Hafner"},{"article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","year":"2015","author":"Stadie","key":"ref20"},{"key":"ref21","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1009070"},{"article-title":"How can we define intrinsic motivation?","volume-title":"the 8th International Conference on Epigenetic Robotics: Modeling Cognitive Development in Robotic Systems","author":"Oudeyer","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.3389\/neuro.12.006.2007"},{"key":"ref25","article-title":"novelty, n., sense 1.a"},{"article-title":"Exploration via empowerment gain: Combining novelty, surprise and learning progress","volume-title":"ICML 2021 Workshop on Unsupervised Reinforcement Learning","author":"Becker-Ehmck","key":"ref26"},{"key":"ref27","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"International conference on machine learning","author":"Schulman"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553441"},{"key":"ref29","article-title":"Action-conditional video prediction using deep networks in atari games","volume":"28","author":"Oh","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref30","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","volume":"30","author":"Tang","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref31","article-title":"Unifying count-based exploration and intrinsic motivation","volume":"29","author":"Bellemare","year":"2016","journal-title":"Advances in neural information processing systems"},{"issue":"Oct","key":"ref32","first-page":"213","article-title":"R-max-a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"Brafman","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref34","first-page":"2721","article-title":"Count-based exploration with neural density models","volume-title":"International conference on machine learning","author":"Ostrovski"},{"key":"ref35","article-title":"Ex2: Exploration with exemplar models for deep reinforcement learning","volume":"30","author":"Fu","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1515\/pjbr-2019-0005"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-020-01849-3"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1162\/EVCO_a_00025"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05845-8"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811663"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.0803390105"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1080\/14640747308400340"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.4236\/jcc.2019.73002"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160983"},{"key":"ref46","article-title":"When to trust your model: Model-based policy optimization","volume":"32","author":"Janner","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref47","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"Chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref48","first-page":"2","article-title":"Learning visual feature spaces for robotic manipulation with deep spatial autoencoders","volume":"25","author":"Finn","year":"2015"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"article-title":"Deepmind control suite","year":"2018","author":"Tassa","key":"ref50"},{"article-title":"Soft actor-critic algorithms and applications","year":"2018","author":"Haarnoja","key":"ref51"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794102"},{"key":"ref53","first-page":"1300","article-title":"Robel: Robotics benchmarks for learning with low-cost robots","volume-title":"Conference on robot learning","author":"Ahn"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"}],"event":{"name":"2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2024,10,14]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2024,10,18]]}},"container-title":["2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10801246\/10801290\/10801857.pdf?arnumber=10801857","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,26]],"date-time":"2024-12-26T06:58:31Z","timestamp":1735196311000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10801857\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,14]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/iros58592.2024.10801857","relation":{},"subject":[],"published":{"date-parts":[[2024,10,14]]}}}