{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T16:10:55Z","timestamp":1778947855934,"version":"3.51.4"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160581","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"1349-1356","source":"Crossref","is-referenced-by-count":12,"title":["Training Efficient Controllers via Analytic Policy Gradient"],"prefix":"10.1109","author":[{"given":"Nina","family":"Wiedemann","sequence":"first","affiliation":[{"name":"University of Zurich,Robotics and Perception Group,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Valentin","family":"W\u00fcest","sequence":"additional","affiliation":[{"name":"Ecole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Laboratory of Intelligent Systems"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Antonio","family":"Loquercio","sequence":"additional","affiliation":[{"name":"University of Zurich,Robotics and Perception Group,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthias","family":"M\u00fcller","sequence":"additional","affiliation":[{"name":"Intel,Embodied AI Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dario","family":"Floreano","sequence":"additional","affiliation":[{"name":"Ecole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne (EPFL),Laboratory of Intelligent Systems"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Davide","family":"Scaramuzza","sequence":"additional","affiliation":[{"name":"University of Zurich,Robotics and Perception Group,Switzerland"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Leveraging reward gradients for reinforcement learning in differentiable physics simulations","author":"gillen","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1109\/72.279181","article-title":"Learning long-term dependencies with gradient descent is difficult","volume":"5","author":"bengio","year":"1994","journal-title":"IEEE Transactions on Neural Networks"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MED.2014.6961532"},{"key":"ref14","article-title":"Learning to control pdes with differentiable physics","author":"holl","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref11","article-title":"Scalable differentiable physics for learning and control","author":"qiao","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref10","article-title":"A differentiable programming system to bridge machine learning and scientific computing","author":"innes","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref17","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1017\/9781139061759"},{"key":"ref19","article-title":"Neural ordinary differential equations","author":"chen","year":"2018","journal-title":"NeurIPS"},{"key":"ref18","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.2514\/6.2017-1512"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s12532-018-0139-4"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2776353"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3131690"},{"key":"ref42","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref41","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"paszke","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref44","article-title":"Mbrl-lib: A modular library for model-based reinforcement learning","author":"pineda","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref43","article-title":"Stable baselines3","author":"raffin","year":"2019"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2017.2786734"},{"key":"ref8","article-title":"Difftaichi: Differentiable programming for physical simulation","author":"hu","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref7","article-title":"Brax-a differentiable physics engine for large scale rigid body simulation","author":"freeman","year":"0","journal-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)"},{"key":"ref9","article-title":"End-to-end differentiable physics for learning and control","volume":"31","author":"de avila belbute-peres","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.3043361"},{"key":"ref3","article-title":"Neural-mpc: Deep learning model predictive control for quadrotors and agile robotic platforms","author":"salzmann","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636053"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-012-9724-3"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1515\/9781400840601"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368182"},{"key":"ref34","first-page":"430","volume":"1","author":"bakker","year":"2003","journal-title":"A robot that reinforcement-learns to identify and memorize important previous observations"},{"key":"ref37","article-title":"Gradients are not all you need","author":"metz","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1093\/jigpal\/jzp049"},{"key":"ref31","article-title":"Total stochastic gradient algorithms and applications in reinforcement learning","volume":"31","author":"parmas","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref30","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"0","journal-title":"Proceedings of the International Conference on Machine Learning"},{"key":"ref33","first-page":"617","article-title":"Model-based reinforcement learning via meta-policy optimization","author":"clavera","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"6","DOI":"10.3389\/fnbot.2019.00006","article-title":"A differentiable physics engine for deep learning in robotics","author":"degrave","year":"2019","journal-title":"Frontiers in Neurorobotics"},{"key":"ref2","first-page":"4958","author":"yu","year":"2017","journal-title":"Deep Reinforcement Learning Based Optimal Trajectory Tracking Control of Autonomous Underwater Vehicle"},{"key":"ref1","first-page":"1","author":"li","year":"2019","journal-title":"Fast and accurate trajectory tracking for unmanned aerial vehicles based on deep reinforcement learning"},{"key":"ref39","first-page":"1147","article-title":"Flightmare: A flexible quadrotor simulator","author":"song","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref24","first-page":"1773","article-title":"Soft multicopter control using neural dynamics identification","author":"deng","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759592"},{"key":"ref26","article-title":"Deluca-a differentiable control library: Environments, methods, and benchmarking","author":"gradu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7798978"},{"key":"ref20","article-title":"Learning continuous control policies by stochastic value gradients","volume":"28","author":"heess","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref22","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref21","article-title":"Dream to control: Learning behaviors by latent imagination","author":"hafner","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref28","article-title":"gradsim: Differentiable simulation for system identification and visuomotor control","author":"murthy","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref27","first-page":"7979","article-title":"Pontryagin differentiable programming: An end-to-end learning and control framework","volume":"33","author":"jin","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/280814.280816"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160581.pdf?arnumber=10160581","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:35:57Z","timestamp":1690220157000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160581\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160581","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}