{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T15:46:07Z","timestamp":1772207167783,"version":"3.50.1"},"reference-count":41,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,5]]},"DOI":"10.1109\/icra.2016.7487175","type":"proceedings-article","created":{"date-parts":[[2016,6,9]],"date-time":"2016-06-09T17:33:24Z","timestamp":1465493604000},"page":"528-535","source":"Crossref","is-referenced-by-count":271,"title":["Learning deep control policies for autonomous aerial vehicles with MPC-guided policy search"],"prefix":"10.1109","author":[{"given":"Tianhao","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Gregory","family":"Kahn","sequence":"additional","affiliation":[]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[]},{"given":"Pieter","family":"Abbeel","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"key":"ref38","article-title":"Nonlinear model predictive tracking control for rotorcraft-based unmanned aerial vehicles","author":"shim","year":"2002","journal-title":"American Control Conference (ACC)"},{"key":"ref33","article-title":"ALVINN: an autonomous land vehicle in a neural network","author":"pomerleau","year":"1989","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2009.5152385"},{"key":"ref30","article-title":"Rectified linear units improve restricted boltzmann machines","author":"nair","year":"2010","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630809"},{"key":"ref36","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume":"15","author":"ross","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref35","article-title":"Robust constrained model predictive control","author":"richards","year":"2004"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.10.018"},{"key":"ref10","article-title":"Fast biped walking with a reflexive controller and realtime policy searching","author":"geng","year":"2006","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389841"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980095"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(92)90053-I"},{"key":"ref13","author":"jacobson","year":"1970","journal-title":"Differential Dynamic Programming"},{"key":"ref14","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2010.VI.005"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2004.1307456"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2015.XI.012"},{"key":"ref19","article-title":"Learning neural network policies with guided policy search under unknown dynamics","author":"levine","year":"2014","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385647"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2012.6225035"},{"key":"ref27","article-title":"Playing Atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"NIPS Deep Learning Workshop"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385823"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2011.VII.008"},{"key":"ref29","article-title":"A model predictive controller for quadrocopter state interception","author":"mueller","year":"2013","journal-title":"European Control Conference (ECC)"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/0278364907084980"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7138978"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1049\/iet-cta.2011.0348"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385934"},{"key":"ref1","article-title":"An application of reinforcement learning to aerobatic helicopter flight","author":"abbeel","year":"2006","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref20","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7138994"},{"key":"ref21","article-title":"Learning complex neural network policies with trajectory optimization","year":"2014","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref24","first-page":"222","article-title":"Iterative linear quadratic regulator design for nonlinear biological movement systems","author":"li","year":"2004","journal-title":"ICINCO"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref23","article-title":"End-to-end training of deep visuomotor policies","author":"levine","year":"2015","journal-title":"arXiv preprint arXiv 1504 00702"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2004.08.019"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509980"}],"event":{"name":"2016 IEEE International Conference on Robotics and Automation (ICRA)","location":"Stockholm, Sweden","start":{"date-parts":[[2016,5,16]]},"end":{"date-parts":[[2016,5,21]]}},"container-title":["2016 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7478842\/7487087\/07487175.pdf?arnumber=7487175","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T12:03:39Z","timestamp":1498305819000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7487175\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,5]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/icra.2016.7487175","relation":{},"subject":[],"published":{"date-parts":[[2016,5]]}}}