{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T03:56:04Z","timestamp":1774583764523,"version":"3.50.1"},"reference-count":130,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"European Union&#x0027;s Horizon Europe Research and Innovation Programme","award":["101120732"],"award-info":[{"award-number":["101120732"]}]},{"DOI":"10.13039\/501100000781","name":"European Research Council","doi-asserted-by":"publisher","award":["864042"],"award-info":[{"award-number":["864042"]}],"id":[{"id":"10.13039\/501100000781","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Robot."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/tro.2025.3644945","type":"journal-article","created":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T18:50:23Z","timestamp":1765997423000},"page":"673-692","source":"Crossref","is-referenced-by-count":4,"title":["Actor\u2013Critic Model Predictive Control: Differentiable Optimization Meets Reinforcement Learning for Agile Flight"],"prefix":"10.1109","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7977-7802","authenticated-orcid":false,"given":"Angel","family":"Romero","sequence":"first","affiliation":[{"name":"Robotics and Perception Group, University of Zurich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0590-0043","authenticated-orcid":false,"given":"Elie","family":"Aljalbout","sequence":"additional","affiliation":[{"name":"Robotics and Perception Group, University of Zurich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6352-3744","authenticated-orcid":false,"given":"Yunlong","family":"Song","sequence":"additional","affiliation":[{"name":"Robotics and Perception Group, University of Zurich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3831-6778","authenticated-orcid":false,"given":"Davide","family":"Scaramuzza","sequence":"additional","affiliation":[{"name":"Robotics and Perception Group, University of Zurich, Zurich, Switzerland"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/4580"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nrn2787"},{"issue":"1","key":"ref3","first-page":"1","article-title":"A path towards autonomous machine intelligence version 0.9. 2 2022-06-27","volume":"62","author":"LeCun","year":"2022","journal-title":"Open Rev."},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/AERO53065.2022.9843428"},{"key":"ref5","volume-title":"Applied Optimal Control: Optimization, Estimation, and Control","author":"Arthur","year":"1975"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-41108-8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-32552-1_48"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2020.XVI.081","article-title":"Alphapilot: Autonomous drone racing","volume-title":"Proc. Robot.: Sci. Syst.","author":"Foehn","year":"2020"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abh1221"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2022.3173711"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3185772"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6942636"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abk2822"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adg1462"},{"key":"ref15","article-title":"From machine learning to robotics: Challenges and opportunities for embodied intelligence","author":"Roy","year":"2021"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-022-10039-8"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11127523"},{"key":"ref19","article-title":"Is conditional generative modeling all you need for decision making?","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Ajay","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-04357-7"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06419-4"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020211"},{"key":"ref26","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2017"},{"key":"ref27","article-title":"Reinforcement learning with sparse rewards using guidance from offline demonstration","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Rengarajan","year":"2022"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-090419-075625"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3049335"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610381"},{"key":"ref31","first-page":"8299","article-title":"Differentiable MPC for end-to-end planning and control","volume-title":"Proc. Adv. neural Inf. Process. Syst.","volume":"31","author":"Amos","year":"2018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(89)90002-2"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2009.2017934"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2011.05.006"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1002\/oca.2123"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2017.8246930"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2017.7995716"},{"key":"ref38","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2024.XX.022","article-title":"Model predictive control for aggressive driving over uneven terrain","volume-title":"Proc. Robot.: Sci. Syst.","author":"Han","year":"2024"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3264758"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3339543"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341785"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981780"},{"key":"ref43","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2024.XX.109","article-title":"MPCC++: Model predictive contouring control for time-optimal flight with safety constraints","volume-title":"Proc. Robotics: Sci. Syst.","author":"Krinner","year":"2024"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2021.109597"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2024.3431988"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3386053"},{"key":"ref47","first-page":"604","article-title":"One model to drift them all: Physics-informed conditional diffusion model for driving at the limits","volume-title":"Proc. 8th Annu. Conf. Robot Learn.","author":"Djeumou","year":"2025"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2010.2049527"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-75472-3_6"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593885"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2800124"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487274"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1177\/02783649221102473"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-015-9479-3"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2018.2865891"},{"key":"ref56","first-page":"750","article-title":"Storm: An integrated framework for fast joint-space model-predictive control for reactive manipulation","volume-title":"Proc. Conf. Robot Learn.","author":"Bhardwaj","year":"2022"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802756"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11127320"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-04301-9"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06004-9"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abc5986"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.23919\/ECC54610.2021.9655125"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161417"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8796289"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3097073"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3266985"},{"key":"ref68","first-page":"990","article-title":"Deep value model predictive control","volume-title":"Proc. Conf. Robot Learn.","author":"Hoeller","year":"2020"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/tcst.2025.3620521"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812369"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611492"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/IV55156.2024.10588747"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.23919\/ECC54610.2021.9655042"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adh5401"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.23919\/ECC57647.2023.10178143"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/OJCSYS.2022.3221063"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3192609"},{"key":"ref78","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2015.XI.012","article-title":"DeepMPC: Learning deep latent features for model predictive control","volume-title":"Proc. Robot.: Sci. Syst.","author":"Lenz","year":"2015"},{"key":"ref79","first-page":"2746","article-title":"Embed to control: A locally linear latent dynamics model for control from raw images","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Watter","year":"2015"},{"key":"ref80","article-title":"Visual foresight: Model-based deep reinforcement learning for vision-based robotic control","author":"Ebert","year":"2018"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794351"},{"key":"ref82","article-title":"Computation-aware learning for stable control with gaussian process","volume-title":"Proc. Robot.: Sci. Syst.","author":"Cao","year":"2020"},{"key":"ref83","first-page":"8387","article-title":"Temporal difference learning for model predictive control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hansen","year":"2022"},{"key":"ref84","article-title":"MPC-inspired neural network policies for sequential decision making","author":"Pereira","year":"2018"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejcon.2024.101048"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106211"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejcon.2023.100880"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.23919\/ECC57647.2023.10178119"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3346324"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3146284"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2025.3531818"},{"key":"ref92","article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner","year":"2019"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11128817"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref95","first-page":"4759","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chua","year":"2018"},{"key":"ref96","article-title":"TD-MPC2: Scalable, robust world models for continuous control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hansen","year":"2024"},{"key":"ref97","first-page":"1101","article-title":"Deep dynamics models for learning dexterous manipulation","volume-title":"Proc. Conf. Robot Learn.","author":"Nagabandi","year":"2020"},{"key":"ref98","first-page":"1622","article-title":"Learning off-policy with online planning","volume-title":"Proc. Conf. Robot Learn.","author":"Sikchi","year":"2022"},{"key":"ref99","first-page":"170","article-title":"Difftune$^+$: Hyperparameter-free auto-tuning using auto-differentiation","volume-title":"Proc. 5th Annu. Learn. Dyn. Control Conf.","author":"Cheng","year":"2023"},{"key":"ref100","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2023.XIX.064","article-title":"Iplanner: Imperative path planning","volume-title":"Proc. Robotics: Sci. Syst.","author":"Yang","year":"2023"},{"key":"ref101","first-page":"2170","article-title":"Diffstack: A differentiable and modular control stack for autonomous vehicles","volume-title":"Proc. Conf. Robot Learn.","author":"Karkus","year":"2023"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3331064"},{"key":"ref103","first-page":"3801","article-title":"Theseus: A library for differentiable nonlinear optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Pineda","year":"2022"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02109"},{"key":"ref105","article-title":"Infinite-horizon differentiable model predictive control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"East","year":"2019"},{"key":"ref106","first-page":"1708","article-title":"Learning model predictive controllers with real-time attention for real-world navigation","volume-title":"Proc. 6th Conf. Robot Learn.","author":"Xiao","year":"2022"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.55417\/fr.2022042"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.23919\/ECC54610.2021.9654841"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.3182\/20140824-6-ZA-1003.00203"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-36119-0_4"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636053"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3154013"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1016\/j.jprocont.2014.03.010"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2012.6425822"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2913768"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/ECC.2016.7810277"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3034868"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012902400713"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1080\/00207179.2016.1222553"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1007\/s12532-021-00208-8"},{"key":"ref121","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sutton","year":"1999"},{"key":"ref122","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref123","article-title":"Model-based value estimation for efficient model-free reinforcement learning","author":"Feinberg","year":"2018"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.042"},{"key":"ref125","first-page":"1147","article-title":"Flightmare: A flexible quadrotor simulator","volume-title":"Proc. 2020 Conf. Robot Learn.","volume":"155","author":"Song","year":"2021"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abl6259"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811564"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3398428"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3131690"},{"key":"ref130","article-title":"High-dimensional continuous control using generalized advantage estimation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Schulman","year":"2016"}],"container-title":["IEEE Transactions on Robotics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8860\/11297026\/11301631.pdf?arnumber=11301631","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T19:42:12Z","timestamp":1773862932000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11301631\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":130,"URL":"https:\/\/doi.org\/10.1109\/tro.2025.3644945","relation":{},"ISSN":["1552-3098","1941-0468"],"issn-type":[{"value":"1552-3098","type":"print"},{"value":"1941-0468","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}