{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T18:51:15Z","timestamp":1771959075713,"version":"3.50.1"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9561298","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:28:35Z","timestamp":1634689715000},"page":"6672-6678","source":"Crossref","is-referenced-by-count":24,"title":["Model Predictive Actor-Critic: Accelerating Robot Skill Acquisition with Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Andrew S.","family":"Morgan","sequence":"first","affiliation":[{"name":"Yale University,Department of Mechanical Engineering & Materials Science,USA"}]},{"given":"Daljeet","family":"Nandha","sequence":"additional","affiliation":[{"name":"Technische Universit&#x00E4;t,Intelligent Autonomous Systems,Darmstadt,Germany"}]},{"given":"Georgia","family":"Chalvatzaki","sequence":"additional","affiliation":[{"name":"Technische Universit&#x00E4;t,Intelligent Autonomous Systems,Darmstadt,Germany"}]},{"given":"Carlo","family":"D'Eramo","sequence":"additional","affiliation":[{"name":"Technische Universit&#x00E4;t,Intelligent Autonomous Systems,Darmstadt,Germany"}]},{"given":"Aaron M.","family":"Dollar","sequence":"additional","affiliation":[{"name":"Yale University,Department of Mechanical Engineering & Materials Science,USA"}]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[{"name":"Technische Universit&#x00E4;t,Intelligent Autonomous Systems,Darmstadt,Germany"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref38","article-title":"Plan online, learn offline: Efficient learning and exploration via model-based control","author":"lowrey","year":"2018"},{"key":"ref33","article-title":"Model-augmented actor-critic: Backpropagating through paths","author":"clavera","year":"2020"},{"key":"ref32","first-page":"1101","article-title":"Deep dynamics models for learning dexterous manipulation","author":"nagabandi","year":"2020","journal-title":"Conference on Robot Learning"},{"key":"ref31","article-title":"Model-ensemble trust-region policy optimization","author":"kurutach","year":"2018"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487277"},{"key":"ref36","first-page":"840","article-title":"Information theoretic model predictive q-learning","author":"bhardwaj","year":"2020","journal-title":"Learning for Dynamics and Control"},{"key":"ref35","article-title":"Deep value model predictive control","author":"farshidian","year":"2019"},{"key":"ref34","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989202"},{"key":"ref40","article-title":"Openai gym","author":"brockman","year":"2016"},{"key":"ref11","first-page":"4754","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref12","first-page":"8289","article-title":"Differentiable mpc for end-to-end planning and control","author":"amos","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref13","article-title":"Mpc-inspired neural network policies for sequential decision making","author":"pereira","year":"2018"},{"key":"ref14","first-page":"12 519","article-title":"When to trust your model: Model-based policy optimization","author":"janner","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"williams","year":"1992","journal-title":"Machine Learning"},{"key":"ref16","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref18","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015"},{"key":"ref19","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref28","first-page":"1","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"International Conference on Machine Learning"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487156"},{"key":"ref27","article-title":"Temporal difference models: Model-free deep rl for model-based control","author":"pong","year":"2018"},{"key":"ref3","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2929996"},{"key":"ref29","article-title":"Model-based reinforcement learning via meta-policy optimization","author":"clavera","year":"2018"},{"key":"ref5","article-title":"Algorithmic frame-work for model-based deep reinforcement learning with theoretical guarantees","author":"luo","year":"2018"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3007467"},{"key":"ref7","article-title":"Exploring model-based planning with policy networks","author":"wang","year":"2019"},{"key":"ref2","first-page":"663","article-title":"Using local trajectory optimizers to speed up global optimization in dynamic programming","author":"atkeson","year":"1994","journal-title":"Advances in neural information processing systems"},{"key":"ref9","first-page":"1071","article-title":"Learning neural network policies with guided policy search under unknown dynamics","author":"levine","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref1","first-page":"1465","article-title":"Receding horizon differential dynamic programming","author":"tassa","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref20","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759617"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460756"},{"key":"ref21","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref42","article-title":"Yale openhand project","year":"0"},{"key":"ref24","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2014.7090666"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2015.2448951"},{"key":"ref26","article-title":"Combining model-based and model-free updates for trajectory-centric reinforcement learning","author":"chebotar","year":"2017"},{"key":"ref43","first-page":"1","article-title":"Towards generalized manipulation learning through grasp mechanics-based features and self-supervision","author":"morgan","year":"2021","journal-title":"IEEE Transactions on Robotics"},{"key":"ref25","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","location":"Xi'an, China","start":{"date-parts":[[2021,5,30]]},"end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09561298.pdf?arnumber=9561298","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T23:20:54Z","timestamp":1659482454000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9561298\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9561298","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}