{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T21:36:35Z","timestamp":1768253795383,"version":"3.49.0"},"reference-count":55,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10161574","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"1011-1018","source":"Crossref","is-referenced-by-count":7,"title":["Gradient-Based Trajectory Optimization With Learned Dynamics"],"prefix":"10.1109","author":[{"given":"Bhavya","family":"Sukhija","sequence":"first","affiliation":[{"name":"ETH,Department of Computer Science,Z&#x00FC;rich,Switzerland"}]},{"given":"Nathanael","family":"K\u00f6hler","sequence":"additional","affiliation":[{"name":"ETH,Department of Computer Science,Z&#x00FC;rich,Switzerland"}]},{"given":"Miguel","family":"Zamora","sequence":"additional","affiliation":[{"name":"ETH,Department of Computer Science,Z&#x00FC;rich,Switzerland"}]},{"given":"Simon","family":"Zimmermann","sequence":"additional","affiliation":[{"name":"ETH,Department of Computer Science,Z&#x00FC;rich,Switzerland"}]},{"given":"Sebastian","family":"Curi","sequence":"additional","affiliation":[{"name":"ETH,Department of Computer Science,Z&#x00FC;rich,Switzerland"}]},{"given":"Andreas","family":"Krause","sequence":"additional","affiliation":[{"name":"ETH,Department of Computer Science,Z&#x00FC;rich,Switzerland"}]},{"given":"Stelian","family":"Coros","sequence":"additional","affiliation":[{"name":"ETH,Department of Computer Science,Z&#x00FC;rich,Switzerland"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s10339-011-0404-1"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-0429-2"},{"key":"ref15","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/0893-6080(89)90020-8"},{"key":"ref53","article-title":"Model-ensemble trust-region policy optimization","author":"kurutach","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref52","article-title":"Epopt: Learning robust neural network policies using model ensembles","author":"rajeswaran","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref11","author":"ljung","year":"1999","journal-title":"System Identification"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1002\/gamm.202100009"},{"key":"ref10","first-page":"123","article-title":"System identification - a survey","volume":"7","author":"astrom","year":"1971","journal-title":"Auto-matica"},{"key":"ref54","first-page":"617","article-title":"Model-based reinforcement learning via meta-policy optimization","volume":"87","author":"clavera","year":"2018","journal-title":"Proceedings of The 2nd Conference on Robot Learning ser Proceedings of Machine Learning Research"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1002\/oca.2123"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509562"},{"key":"ref19","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proceedings of the 28th International Conference on machine learning (ICML-11) ser ICML'11 Omnipress"},{"key":"ref18","author":"moerland","year":"2021","journal-title":"Model-based reinforcement learning A survey"},{"key":"ref51","article-title":"When to trust your model: Model-based policy optimization","volume":"32","author":"janner","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref50","first-page":"2829","article-title":"Continuous deep q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"Int Conference on Machine Learning"},{"key":"ref46","author":"puterman","year":"2014","journal-title":"Markov Decision Processes Discrete Stochastic Dy-namic Programming"},{"key":"ref45","author":"hafner","year":"2020","journal-title":"Dream to control Learning behaviors by latent imagination"},{"key":"ref48","article-title":"Gaussian error linear units (gelus)","author":"hendrycks","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref47","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"chung","year":"2014","journal-title":"NeurIPS 2014 Workshop on Deep Learning"},{"key":"ref42","article-title":"Generating sequences with recurrent neural networks","author":"graves","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref41","first-page":"277","article-title":"Model-predictive control via cross-entropy and gradient-based optimization","volume":"120","author":"bharadhwaj","year":"2020","journal-title":"Proceedings of the 2nd Conference on Learning for Dynamics and Control ser Proceedings of Machine Learning Research"},{"key":"ref44","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"2019","journal-title":"Int Conference on Machine Learning"},{"key":"ref43","article-title":"Recurrent world models facilitate pol-icy evolution","volume":"31","author":"ha","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(89)90002-2"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.052"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201368"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3323003"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2015.2505910"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509920"},{"key":"ref6","author":"biagiotti","year":"2008","journal-title":"Trajectory Planning for Automatic Machines and Robots"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abc5986"},{"key":"ref40","author":"williams","year":"2018","journal-title":"Information-theoretic model predictive control Theory and applications to autonomous driving"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1016\/B978-0-444-53859-8.00003-5","article-title":"Chapter 3 - the cross-entropy method for optimization","author":"botev","year":"2013","journal-title":"Handbook of Statistics"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2018.07.326"},{"key":"ref37","article-title":"Regularizing trajectory optimization with denoising autoencoders","author":"boney","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref36","article-title":"Learning neural network policies with guided policy search under unknown dynamics","volume":"27","author":"levine","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/72.80202"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1016\/S1474-6670(17)47737-8","article-title":"Neural networks in system identification","volume":"27","author":"sjaqberg","year":"1994","journal-title":"IFAC Proceedings Volumes"},{"key":"ref33","article-title":"Nonlinear systems identification using deep dynamic neural networks","author":"ogunmolu","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2006.875186"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-73429-1_1"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/1729881419839596"},{"key":"ref39","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"International Conference on Learning Representations"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2005.1469949"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561835"},{"key":"ref26","first-page":"1101","article-title":"Deep dynamics models for learning dexterous manipulation","volume":"100","author":"nagabandi","year":"0","journal-title":"Proceedings of the Conference on Robot Learning ser Proceedings of Machine Learning Research"},{"key":"ref25","article-title":"Deep rein-forcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.218"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2926677"},{"key":"ref21","article-title":"Data-efficient reinforcement learning with probabilistic model predictive control","author":"kamthe","year":"2018","journal-title":"Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS)"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/3206.001.0001"},{"key":"ref27","first-page":"14 156","article-title":"Efficient model-based reinforcement learning through optimistic policy search and planning","author":"curi","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023744"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161574.pdf?arnumber=10161574","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:31:11Z","timestamp":1690219871000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10161574\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":55,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10161574","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}