{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T06:43:34Z","timestamp":1730270614389,"version":"3.28.0"},"reference-count":36,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,10]]},"DOI":"10.1109\/iros.2018.8593588","type":"proceedings-article","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T02:33:30Z","timestamp":1548297210000},"page":"1540-1546","source":"Crossref","is-referenced-by-count":3,"title":["Model-Based Action Exploration for Learning Dynamic Motion Skills"],"prefix":"10.1109","author":[{"given":"Glen","family":"Berseth","sequence":"first","affiliation":[]},{"given":"Alex","family":"Kyriazis","sequence":"additional","affiliation":[]},{"given":"Ivan","family":"Zinin","sequence":"additional","affiliation":[]},{"given":"William","family":"Choi","sequence":"additional","affiliation":[]},{"given":"Michiel","family":"van de Panne","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","volume":"abs 1502 3167","author":"ioffe","year":"2015","journal-title":"Batch Normalization Accelerating Deep Network Training by Reducing Internal Covariate Shift"},{"journal-title":"Layer normalization","year":"2016","author":"lei ba","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3099564.3099567"},{"journal-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref36"},{"key":"ref35","volume":"abs 1502 5477","author":"schulman","year":"2015","journal-title":"Trust region policy optimization"},{"journal-title":"The Predictron End-To-End Learning and Planning","year":"2016","author":"silver","key":"ref34"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1007\/978-3-642-27645-3_7","article-title":"Reinforcement learning in continuous state and action spaces","author":"van hasselt","year":"2012","journal-title":"Reinforcement Learning"},{"key":"ref12","volume":"abs 1610 5182","author":"heess","year":"2016","journal-title":"Learning and transfer of modulated locomotor controllers"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"41:1","DOI":"10.1145\/3072959.3073602","article-title":"Deeploco: Dynamic locomotion skills using hierarchical deep reinforcement learning","volume":"36","author":"peng","year":"2017","journal-title":"ACM Trans Graph"},{"key":"ref14","volume":"abs 1605 9674","author":"houthooft","year":"2016","journal-title":"Curiosity-driven exploration in deep reinforcement learning via Bayesian neural networks"},{"key":"ref15","first-page":"2829","article-title":"Continuous deep q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"Proc ICML"},{"key":"ref16","volume":"abs 1703 4070","author":"mishra","year":"2017","journal-title":"Prediction and control with temporal segment models"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/122344.122377"},{"key":"ref18","first-page":"2944","article-title":"Learning continuous control policies by stochastic value gradients","volume":"28","author":"heess","year":"2015","journal-title":"Advances in neural information processing systems"},{"journal-title":"Neural Network Dynamics for Model-Based Deep Reinforcement Learning with Model-Free Fine-Tuning","year":"2017","author":"nagabandi","key":"ref19"},{"key":"ref28","volume":"abs 1602 2867","author":"tamar","year":"2016","journal-title":"Value iteration networks"},{"key":"ref4","volume":"abs 1509 2971","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref27","volume":"abs 1511 4143","author":"hausknecht","year":"2015","journal-title":"Deep Reinforcement Learning in Parameterized Action Space"},{"journal-title":"Parameter space noise for exploration","year":"2017","author":"plappert","key":"ref3"},{"journal-title":"Emergence of locomotion behaviours in rich environments","year":"2017","author":"heess","key":"ref6"},{"key":"ref29","volume":"abs 1612 429","author":"finn","year":"2016","journal-title":"Generalizing skills with semi-supervised reinforcement learning"},{"key":"ref5","volume":"1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref8","volume":"abs 1506 2438","author":"schulman","year":"2015","journal-title":"High-dimensional continuous control using generalized advantage estimation"},{"key":"ref7","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"journal-title":"Noisy Networks for Exploration","year":"2017","author":"fortunato","key":"ref2"},{"key":"ref9","volume":"abs 1602 1783","author":"mnih","year":"2016","journal-title":"Asynchronous methods for deep reinforcement learning"},{"journal-title":"Deep Exploration via Randomized Value Functions","year":"2017","author":"osband","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6942746"},{"key":"ref22","first-page":"2672","article-title":"Generative adversarial nets","volume":"27","author":"goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"journal-title":"MBMF Model-Based Priors for Model-Free Reinforcement Learning","year":"2017","author":"bansal","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"ref23","volume":"abs 1611 7004","author":"isola","year":"2016","journal-title":"Image-to-image translation with conditional adversarial networks"},{"key":"ref26","volume":"abs 1509 2971","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref25","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"Proc ICML"}],"event":{"name":"2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2018,10,1]]},"location":"Madrid","end":{"date-parts":[[2018,10,5]]}},"container-title":["2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8574473\/8593358\/08593588.pdf?arnumber=8593588","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T05:48:04Z","timestamp":1598248084000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8593588\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/iros.2018.8593588","relation":{},"subject":[],"published":{"date-parts":[[2018,10]]}}}