{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T09:57:47Z","timestamp":1730195867982,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/aim43001.2020.9158962","type":"proceedings-article","created":{"date-parts":[[2020,8,5]],"date-time":"2020-08-05T21:18:31Z","timestamp":1596662311000},"page":"2004-2010","source":"Crossref","is-referenced-by-count":0,"title":["Model-Based Robot Learning Control with Uncertainty Directed Exploration"],"prefix":"10.1109","author":[{"given":"Junjie","family":"Cao","sequence":"first","affiliation":[]},{"given":"Yong","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zaisheng","family":"Pan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref31","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics ser Proceedings of the fourteenth international conference on artificial intelligence and statistics"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1016\/B978-0-444-53859-8.00003-5","article-title":"The cross-entropy method for optimization","volume":"31","author":"botev","year":"2013","journal-title":"Handbook of Statistics"},{"journal-title":"Sim-to-real reinforcement learning for deformable object manipulation","year":"2018","author":"matas","key":"ref10"},{"key":"ref11","article-title":"Highdimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"arXiv preprint arXiv 1506 02349"},{"key":"ref12","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning ser ICML&#x2019;10"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau5872"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref15","article-title":"Uncertainty decomposition in bayesian neural networks with latent variables","author":"depeweg","year":"2017","journal-title":"arXiv preprint arXiv 1706 08495"},{"key":"ref16","first-page":"4754","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"Parameter space noise for exploration","author":"plappert","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref19","first-page":"2753","article-title":"Exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Advances in Neural Information Processing Systems Ser NIPS"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref4","first-page":"1050","article-title":"Dropout as a bayesian approximation: Representing model uncertainty in deep learning","author":"gal","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref27","article-title":"Improving pilco with bayesian neural network dynamics models","volume":"4","author":"gal","year":"2016","journal-title":"Data-Efficient Machine Learning workshop"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1992.4.3.448"},{"key":"ref6","first-page":"176","article-title":"Practical confidence and prediction intervals","author":"heskes","year":"1997","journal-title":"Advances in neural information processing systems"},{"key":"ref29","first-page":"497","article-title":"A survey of numerical methods for optimal control","volume":"135","author":"rao","year":"2009","journal-title":"Advances in the Astronautical Sciences"},{"key":"ref5","first-page":"4026","article-title":"Deep exploration via bootstrapped dqn","author":"osband","year":"2016","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/0278364910371999"},{"key":"ref7","first-page":"4072","article-title":"High-quality prediction intervals for deep learning: A distribution-free, ensembled approach","author":"pearce","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref2","first-page":"2377","article-title":"Generalization and exploration via randomized value functions","volume":"48","author":"osband","year":"2016","journal-title":"Proceedings of The 33rd International Conference on Machine Learning ser Proceedings of Machine Learning Research"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2010.936947"},{"key":"ref1","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv preprint arXiv 1312 5602"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.2307\/2332286"},{"journal-title":"Ucb exploration via q-ensembles","year":"2017","author":"chen","key":"ref21"},{"key":"ref24","first-page":"3003","article-title":"(more) efficient reinforcement learning via posterior sampling","author":"osband","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref23","first-page":"2249","article-title":"An empirical evaluation of thompson sampling","author":"chapelle","year":"2011","journal-title":"Proceedings of the 24th International Conference on Neural Information Processing Systems ser NIPS&#x2019;11"},{"key":"ref26","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning"},{"key":"ref25","first-page":"1109","article-title":"Vime: Variational information maximizing exploration","author":"houthooft","year":"2016","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2020 IEEE\/ASME International Conference on Advanced Intelligent Mechatronics (AIM)","start":{"date-parts":[[2020,7,6]]},"location":"Boston, MA, USA","end":{"date-parts":[[2020,7,9]]}},"container-title":["2020 IEEE\/ASME International Conference on Advanced Intelligent Mechatronics (AIM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9149748\/9158642\/09158962.pdf?arnumber=9158962","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:48:06Z","timestamp":1656344886000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9158962\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/aim43001.2020.9158962","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}