{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T15:30:36Z","timestamp":1759937436514,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,10,24]]},"DOI":"10.1109\/iros45743.2020.9341021","type":"proceedings-article","created":{"date-parts":[[2021,3,15]],"date-time":"2021-03-15T14:49:56Z","timestamp":1615819796000},"page":"5453-5459","source":"Crossref","is-referenced-by-count":15,"title":["An Online Training Method for Augmenting MPC with Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Guillaume","family":"Bellegarda","sequence":"first","affiliation":[{"name":"University of California at Santa Barbara (UCSB),Robotics Laboratory,Department of Electrical and Computer Engineering"}]},{"given":"Katie","family":"Byl","sequence":"additional","affiliation":[{"name":"University of California at Santa Barbara (UCSB),Robotics Laboratory,Department of Electrical and Computer Engineering"}]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-004-0559-y"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s12532-018-0139-4"},{"key":"ref10","first-page":"207","article-title":"Variational policy search via trajectory optimization","author":"levine","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref11","first-page":"1","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"International Conference on Machine Learning"},{"key":"ref12","article-title":"Plan online, learn offline: Efficient learning and exploration via model-based control","author":"lowrey","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6614995"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989202"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989043"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461203"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1002\/rob.21559"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-36279-8_32"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029417"},{"key":"ref28","first-page":"834","article-title":"Improving stochastic policy gradients in continuous control with deep reinforcement learning using the beta distribution","author":"chou","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"article-title":"Openai baselines","year":"2017","author":"dhariwal","key":"ref27"},{"key":"ref3","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"CoRR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"ref29","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref5","article-title":"Learning to walk via deep reinforcement learning","author":"haarnoja","year":"2018","journal-title":"CoRR"},{"key":"ref8","article-title":"No-regret reductions for imitation learning and structured prediction","author":"ross","year":"2010","journal-title":"CoRR"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau5872"},{"key":"ref2","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"CoRR"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2014.X.052"},{"key":"ref1","article-title":"Emergence of locomotion behaviours in rich environments","author":"heess","year":"2017","journal-title":"CoRR"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197541"},{"article-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning","year":"2016","author":"coumans","key":"ref22"},{"journal-title":"Reinforcement Learning An Introduction Adaptive Computations and Machine Learning","year":"1998","author":"sutton","key":"ref21"},{"key":"ref24","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"CoRR"},{"key":"ref23","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"CoRR"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref26"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2000.844054"}],"event":{"name":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2020,10,24]]},"location":"Las Vegas, NV, USA","end":{"date-parts":[[2021,1,24]]}},"container-title":["2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9340668\/9340635\/09341021.pdf?arnumber=9341021","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T21:57:23Z","timestamp":1656453443000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9341021\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,24]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/iros45743.2020.9341021","relation":{},"subject":[],"published":{"date-parts":[[2020,10,24]]}}}