{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:53:07Z","timestamp":1761897187554,"version":"3.41.2"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,11]]},"DOI":"10.1109\/iros40897.2019.8967896","type":"proceedings-article","created":{"date-parts":[[2020,1,30]],"date-time":"2020-01-30T23:53:51Z","timestamp":1580428431000},"page":"3704-3711","source":"Crossref","is-referenced-by-count":6,"title":["Improved Exploration through Latent Trajectory Optimization in Deep Deterministic Policy Gradient"],"prefix":"10.1109","author":[{"given":"Kevin Sebastian","family":"Luck","sequence":"first","affiliation":[{"name":"Arizona State University,Interactive Robotics Lab,Tempe,AZ,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mel","family":"Vecerik","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simon","family":"Stepputtis","sequence":"additional","affiliation":[{"name":"Arizona State University,Interactive Robotics Lab,Tempe,AZ,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Heni Ben","family":"Amor","sequence":"additional","affiliation":[{"name":"Arizona State University,Interactive Robotics Lab,Tempe,AZ,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jonathan","family":"Scholz","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Using deep reinforcement learning to learn high-level policies on the atrias biped","author":"li","year":"2018","journal-title":"arXiv preprint arXiv 1809 10811"},{"key":"ref11","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206356"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.075"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6942745"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-018-0533-0"},{"key":"ref18","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462891"},{"key":"ref4","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/279232.279236"},{"key":"ref3","article-title":"Exploiting symmetries in reinforcement learning of bimanual robotic tasks","author":"colome","year":"2019","journal-title":"IEEE l of Robotics and Automation"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460934"},{"key":"ref5","first-page":"2944","article-title":"Learning continuous control policies by stochastic value gradients","author":"heess","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref8","first-page":"849","article-title":"Policy search for motor primitives in robotics","author":"kober","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref7","article-title":"Learning to drive in a day","author":"kendall","year":"2018","journal-title":"arXiv preprint arXiv 1807 00412"},{"key":"ref2","first-page":"4759","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.01.087"},{"key":"ref1","article-title":"Closing the sim-to-real loop: Adapting simulation randomization with real world experience","author":"chebotar","year":"2018","journal-title":"arXiv preprint arXiv 1810 06008"},{"key":"ref20","article-title":"Universal planning networks","author":"srinivas","year":"2018","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref22","first-page":"2753","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref21","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"stadie","year":"2015","journal-title":"arXiv preprint arXiv 1507 00814"},{"key":"ref24","article-title":"Learning to fly like a bird","author":"tedrake","year":"2009","journal-title":"14th International Symposium on Robotics Research Lucerne Switzerland"},{"key":"ref23","article-title":"Deepmind control suite","author":"tassa","year":"2018","journal-title":"arXiv preprint arXiv 1801 00257"},{"key":"ref26","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"ve?er\u00edk","year":"2017","journal-title":"arXiv preprint arXiv 1707 08817"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.36.823"}],"event":{"name":"2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2019,11,3]]},"location":"Macau, China","end":{"date-parts":[[2019,11,8]]}},"container-title":["2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8957008\/8967518\/08967896.pdf?arnumber=8967896","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T18:24:28Z","timestamp":1753813468000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8967896\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/iros40897.2019.8967896","relation":{},"subject":[],"published":{"date-parts":[[2019,11]]}}}