{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T16:02:59Z","timestamp":1781366579567,"version":"3.54.1"},"reference-count":28,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1109\/icra.2017.7989379","type":"proceedings-article","created":{"date-parts":[[2017,7,25]],"date-time":"2017-07-25T17:44:28Z","timestamp":1501004668000},"page":"3342-3349","source":"Crossref","is-referenced-by-count":54,"title":["PLATO: Policy learning using adaptive trajectory optimization"],"prefix":"10.1109","author":[{"given":"Gregory","family":"Kahn","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tianhao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pieter","family":"Abbeel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref10","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"AISTATS"},{"key":"ref11","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"ICML"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487175"},{"key":"ref13","article-title":"Imitation learning by coaching","author":"he","year":"2012","journal-title":"NIPS"},{"key":"ref14","article-title":"Learning neural network policies with guided policy search under unknown dynamics","author":"levine","year":"2014","journal-title":"NIPS"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630809"},{"key":"ref16","article-title":"Deep learning for real-time Atari game play using offline Monte-Carlo tree search planning","author":"guo","year":"2014","journal-title":"NIPS"},{"key":"ref17","article-title":"Variational policy search via trajectory optimization","author":"levine","year":"2013","journal-title":"NIPS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2004.08.019"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2005.1469949"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4409115"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.312"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509980"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2015.2509024"},{"key":"ref6","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"ICLRE"},{"key":"ref5","article-title":"End-to-end training of deep visuomotor policies","author":"levine","year":"2016","journal-title":"JMLR"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1561\/2300000021"},{"key":"ref7","article-title":"Learning continuous control policies by stochastic value gradients","author":"heess","year":"2015","journal-title":"NIPS"},{"key":"ref2","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"Workshov on Deeo Learnine NIPS"},{"key":"ref9","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"ICML"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5278-7"},{"key":"ref22","author":"pollard","year":"2000","journal-title":"Asymptopia an exposition of statistical asymptotic theory"},{"key":"ref21","article-title":"Divergences, surrogate loss functions and experimental design","author":"nguyen","year":"2005","journal-title":"NIPS"},{"key":"ref24","article-title":"Rectified linear units improve restricted boltzmann machines","author":"nair","year":"2010","journal-title":"ICML"},{"key":"ref23","article-title":"A model predictive controller for quadrotor state interception","author":"mueller","year":"2013","journal-title":"ECC"},{"key":"ref26","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"ICLRE"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"}],"event":{"name":"2017 IEEE International Conference on Robotics and Automation (ICRA)","location":"Singapore, Singapore","start":{"date-parts":[[2017,5,29]]},"end":{"date-parts":[[2017,6,3]]}},"container-title":["2017 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7960754\/7988677\/07989379.pdf?arnumber=7989379","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,10,2]],"date-time":"2017-10-02T21:48:43Z","timestamp":1506980923000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7989379\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/icra.2017.7989379","relation":{},"subject":[],"published":{"date-parts":[[2017,5]]}}}