{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T16:14:27Z","timestamp":1774023267095,"version":"3.50.1"},"reference-count":351,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T00:00:00Z","timestamp":1769126400000},"content-version":"vor","delay-in-days":22,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006360","name":"BMWK","doi-asserted-by":"publisher","award":["03EI4057A"],"award-info":[{"award-number":["03EI4057A"]}],"id":[{"id":"10.13039\/501100006360","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006360","name":"BMWK","doi-asserted-by":"publisher","award":["03EN3054B"],"award-info":[{"award-number":["03EN3054B"]}],"id":[{"id":"10.13039\/501100006360","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005416","name":"Research Council of Norway","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100005416","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010665","name":"EU Framework Programme for Research and Innovation Marie Sk\u0142odowska-Curie Actions","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100010665","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100021130","name":"Bundesministerium f\u00fcr Wirtschaft und Klimaschutz","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100021130","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["525018088"],"award-info":[{"award-number":["525018088"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["504452366"],"award-info":[{"award-number":["504452366"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["SPP2364"],"award-info":[{"award-number":["SPP2364"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Annual Reviews in Control"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1016\/j.arcontrol.2026.101045","type":"journal-article","created":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T15:41:01Z","timestamp":1770651661000},"page":"101045","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":4,"special_numbering":"C","title":["Synthesis of model predictive control and reinforcement learning: Survey and classification"],"prefix":"10.1016","volume":"61","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7635-2132","authenticated-orcid":false,"given":"Rudolf","family":"Reiter","sequence":"first","affiliation":[]},{"given":"Jasper","family":"Hoffmann","sequence":"additional","affiliation":[]},{"given":"Dirk","family":"Reinhardt","sequence":"additional","affiliation":[]},{"given":"Florian","family":"Messerer","sequence":"additional","affiliation":[]},{"given":"Katrin","family":"Baumg\u00e4rtner","sequence":"additional","affiliation":[]},{"given":"Shambhuraj","family":"Sawant","sequence":"additional","affiliation":[]},{"given":"Joschka","family":"B\u00f6decker","sequence":"additional","affiliation":[]},{"given":"Moritz","family":"Diehl","sequence":"additional","affiliation":[]},{"given":"Sebastien","family":"Gros","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.arcontrol.2026.101045_b1","article-title":"A Neural Network Architecture to Learn Explicit MPC Controllers from Data","year":"2020","journal-title":"Ifac Papersonline"},{"key":"10.1016\/j.arcontrol.2026.101045_b2","unstructured":"Abdolmaleki, A., Springenberg, J. T., Tassa, Y., Munos, R., Heess, N., & Riedmiller, M. (2018). Maximum a Posteriori Policy Optimisation. In International conference on learning representations."},{"key":"10.1016\/j.arcontrol.2026.101045_b3","series-title":"2024 IEEE\/RSJ international conference on intelligent robots and systems","first-page":"2824","article-title":"Driving from vision through differentiable optimal control","author":"Acerbo","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b4","series-title":"Proceedings of the 34th international conference on machine learning","first-page":"22","article-title":"Constrained Policy Optimization","author":"Achiam","year":"2017"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b5","doi-asserted-by":"crossref","first-page":"11841","DOI":"10.1016\/j.ifacol.2023.10.586","article-title":"Fast Reinforcement Learning Based MPC based on NLP Sensitivities","volume":"56","author":"Adhau","year":"2023","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b6","series-title":"Proceedings of the 5th annual learning for dynamics and control conference","first-page":"1493","article-title":"Model Predictive Control via On-Policy Imitation Learning","author":"Ahn","year":"2023"},{"issue":"9","key":"10.1016\/j.arcontrol.2026.101045_b7","doi-asserted-by":"crossref","first-page":"937","DOI":"10.1016\/j.jprocont.2006.06.001","article-title":"A neural network model predictive controller","volume":"16","author":"\u00c5kesson","year":"2006","journal-title":"Journal of Process Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b8","series-title":"Proceedings of the 6th annual learning for dynamics & control conference","first-page":"1452","article-title":"CACTO-SL: Using Sobolev learning to improve continuous actor-critic with trajectory optimization","author":"Alboni","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b9","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1016\/j.sysconle.2017.03.005","article-title":"On the inherent robustness of optimal and suboptimal nonlinear MPC","volume":"106","author":"Allan","year":"2017","journal-title":"Systems & Control Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b10","series-title":"Constrained Markov decision processes","author":"Altman","year":"1999"},{"key":"10.1016\/j.arcontrol.2026.101045_b11","article-title":"Differentiable MPC for End-to-end Planning and Control","volume":"Vol. 31","author":"Amos","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b12","series-title":"European control conference (ECC)","first-page":"1","article-title":"A Painless Deterministic Policy Gradient Method for Learning-based MPC","author":"Anand","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b13","series-title":"Data-Driven Predictive Control and MPC: Do we achieve optimality?","author":"Anand","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b14","series-title":"All AI Models are Wrong, but Some are Optimal","author":"Anand","year":"2025"},{"key":"10.1016\/j.arcontrol.2026.101045_b15","series-title":"Optimal control - linear quadratic methods","author":"Anderson","year":"1990"},{"issue":"20","key":"10.1016\/j.arcontrol.2026.101045_b16","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1016\/j.ifacol.2018.11.055","article-title":"Sensitivity Analysis for Nonlinear Programming in CasADi*","volume":"51","author":"Andersson","year":"2018","journal-title":"IFAC-PapersOnLine"},{"issue":"7","key":"10.1016\/j.arcontrol.2026.101045_b17","doi-asserted-by":"crossref","first-page":"1615","DOI":"10.1109\/TAC.2011.2179349","article-title":"On Average Performance and Stability of Economic Model Predictive Control","volume":"57","author":"Angeli","year":"2012","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b18","series-title":"The MOSEK optimization toolbox for MATLAB manual. Version 10.1.","author":"ApS","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b19","series-title":"Real-Time Certified MPC : Reliable Active-Set QP Solvers","author":"Arnstr\u00f6m","year":"2023"},{"issue":"5","key":"10.1016\/j.arcontrol.2026.101045_b20","doi-asserted-by":"crossref","first-page":"1216","DOI":"10.1016\/j.automatica.2013.02.003","article-title":"Provably safe and robust learning-based model predictive control","volume":"49","author":"Aswani","year":"2013","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b21","article-title":"Reinforcement Learning with Long Short-Term Memory","volume":"Vol. 14","author":"Bakker","year":"2001"},{"key":"10.1016\/j.arcontrol.2026.101045_b22","series-title":"OffCon3: What is state of the art anyway?","author":"Ball","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b23","series-title":"RL-augmented MPC Framework for Agile and Robust Bipedal Footstep Locomotion Planning and Control","author":"Bang","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b24","series-title":"Navigation world models","author":"Bar","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b25","doi-asserted-by":"crossref","unstructured":"Baumg\u00e4rtner, K., Messerer, F., & Diehl, M. (2023). A Unified Local Convergence Analysis of Differential Dynamic Programming, Direct Single Shooting, and Direct Multiple Shooting. In Proceedings of the European control conference (ECC).","DOI":"10.23919\/ECC57647.2023.10178367"},{"key":"10.1016\/j.arcontrol.2026.101045_b26","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1016\/j.ejcon.2020.03.001","article-title":"A Q-learning predictive control scheme with guaranteed stability","volume":"56","author":"Beckenbach","year":"2020","journal-title":"European Journal of Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b27","series-title":"IEEE 61st conference on decision and control (CDC)","first-page":"3711","article-title":"Approximate infinite-horizon predictive control","author":"Beckenbach","year":"2022"},{"issue":"3731","key":"10.1016\/j.arcontrol.2026.101045_b28","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1126\/science.153.3731.34","article-title":"Dynamic programming","volume":"153","author":"Bellman","year":"1966","journal-title":"American Association for the Advancement of Science"},{"key":"10.1016\/j.arcontrol.2026.101045_b29","unstructured":"Bemporad, A., Borrelli, F., & Morari, M. (1999). The explicit solution of constrained LP-Based receding horizon control. In Proceedings of the IEEE conference on decision and control (CDC). Sydney, Australia."},{"issue":"12","key":"10.1016\/j.arcontrol.2026.101045_b30","doi-asserted-by":"crossref","first-page":"1974","DOI":"10.1109\/TAC.2002.805688","article-title":"Model predictive control based on linear programming - the explicit solution","volume":"47","author":"Bemporad","year":"2002","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b31","first-page":"872","article-title":"The explicit solution of model predictive control via multiparametric quadratic programming","volume":"Vol. 2","author":"Bemporad","year":"2000"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b32","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1007\/s10107-003-0454-y","article-title":"Adjustable robust solutions of uncertain linear programs","volume":"99","author":"Ben-Tal","year":"2004","journal-title":"Mathematical Programming"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b33","doi-asserted-by":"crossref","first-page":"405","DOI":"10.1016\/j.ejor.2020.07.063","article-title":"Machine learning for combinatorial optimization: A methodological tour d\u2019horizon","volume":"290","author":"Bengio","year":"2021","journal-title":"European Journal of Operational Research"},{"issue":"4","key":"10.1016\/j.arcontrol.2026.101045_b34","doi-asserted-by":"crossref","first-page":"310","DOI":"10.3166\/ejc.11.310-334","article-title":"Dynamic Programming and Suboptimal Control: A Survey from {ADP} to {MPC}*","volume":"11","author":"Bertsekas","year":"2005","journal-title":"European Journal of Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b35","series-title":"Reinforcement learning and optimal control","author":"Bertsekas","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b36","series-title":"Lessons from alphazero for optimal, model predictive, and adaptive control","author":"Bertsekas","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b37","doi-asserted-by":"crossref","DOI":"10.1016\/j.rico.2022.100121","article-title":"Newton\u2019s method for reinforcement learning and model predictive control","volume":"7","author":"Bertsekas","year":"2022","journal-title":"Results in Control and Optimization"},{"key":"10.1016\/j.arcontrol.2026.101045_b38","article-title":"Neuro-dynamic programming","author":"Bertsekas","year":"1996"},{"key":"10.1016\/j.arcontrol.2026.101045_b39","series-title":"Learning for dynamics and control","first-page":"277","article-title":"Model-predictive control via cross-entropy and gradient-based optimization","author":"Bharadhwaj","year":"2020"},{"key":"10.1016\/j.arcontrol.2026.101045_b40","unstructured":"Bhardwaj, M., Choudhury, S., & Boots, B. (2021). Blending MPC & Value Function Approximation for Efficient Reinforcement Learning. In 9th international conference on learning representations, ICLR."},{"key":"10.1016\/j.arcontrol.2026.101045_b41","series-title":"Online optimization of large scale systems: state of the art","first-page":"295","article-title":"Introduction to Model Based Optimization of Chemical Processes on Moving Horizons","author":"Binder","year":"2001"},{"key":"10.1016\/j.arcontrol.2026.101045_b42","series-title":"Numerical treatment of inverse problems in differential and integral equations","first-page":"95","article-title":"Recent Advances in Parameter Identification Techniques for ODE","author":"Bock","year":"1983"},{"key":"10.1016\/j.arcontrol.2026.101045_b43","series-title":"Real-time and online PDE-constrained optimization","first-page":"3","article-title":"Constrained Optimal Feedback Control of Systems Governed by Large Differential Algebraic Equations","author":"Bock","year":"2007"},{"key":"10.1016\/j.arcontrol.2026.101045_b44","series-title":"Proceedings of the IFAC world congress","first-page":"242","article-title":"A Multiple Shooting Algorithm for Direct Solution of Optimal Control Problems","author":"Bock","year":"1984"},{"key":"10.1016\/j.arcontrol.2026.101045_b45","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2023.106211","article-title":"Optimization of the model predictive control meta-parameters through reinforcement learning","volume":"123","author":"B\u00f8hn","year":"2023","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"10.1016\/j.arcontrol.2026.101045_b46","series-title":"TorchRL: A data-driven decision-making library for PyTorch","author":"Bou","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b47","doi-asserted-by":"crossref","DOI":"10.1016\/j.autcon.2022.104128","article-title":"Comparison of online and offline deep reinforcement learning with model predictive control for thermal energy management","volume":"135","author":"Brandi","year":"2022","journal-title":"Automation in Construction"},{"issue":"10","key":"10.1016\/j.arcontrol.2026.101045_b48","doi-asserted-by":"crossref","first-page":"18808","DOI":"10.1109\/TITS.2022.3160936","article-title":"Learning Interaction-Aware Guidance for Trajectory Optimization in Dense Traffic Scenarios","volume":"23","author":"Brito","year":"2022","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b49","doi-asserted-by":"crossref","first-page":"4616","DOI":"10.1109\/LRA.2021.3068662","article-title":"Where to go Next: Learning a Subgoal Recommendation Policy for Navigation in Dynamic Environments","volume":"6","author":"Brito","year":"2021","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b50","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b51","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","article-title":"Safe Learning in Robotics: From Learning-Based Control to Safe Reinforcement Learning","volume":"5","author":"Brunke","year":"2022","journal-title":"Annual Review of Control, Robotics, and Autonomous Systems"},{"key":"10.1016\/j.arcontrol.2026.101045_b52","unstructured":"Byravan, A., Hasenclever, L., Trochim, P., Mirza, M., Ialongo, A. D., Tassa, J. T., Abdolmaleki, A., Heess, N., Merel, J., & Riedmiller, M. (2022). Evaluating Model-Based Planning and Planner Amortization for Continuous Control. In 10th international conference on learning representations, ICLR."},{"key":"10.1016\/j.arcontrol.2026.101045_b53","doi-asserted-by":"crossref","first-page":"145264","DOI":"10.1109\/ACCESS.2023.3346324","article-title":"A Learning-Based Model Predictive Control Strategy for Home Energy Management Systems","volume":"11","author":"Cai","year":"2023","journal-title":"IEEE Access"},{"key":"10.1016\/j.arcontrol.2026.101045_b54","article-title":"The Scenario Approach to Robust Control Design","author":"Calafiore","year":"2006","journal-title":"Institute of Electrical and Electronics Engineers. Transactions on Automatic Control"},{"issue":"7","key":"10.1016\/j.arcontrol.2026.101045_b55","doi-asserted-by":"crossref","first-page":"4275","DOI":"10.1109\/TAC.2023.3319070","article-title":"State Augmented Constrained Reinforcement Learning: Overcoming the Limitations of Learning With Rewards","volume":"69","author":"Calvo-Fullana","year":"2024","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b56","doi-asserted-by":"crossref","first-page":"11319","DOI":"10.1016\/j.ifacol.2020.12.538","article-title":"Deep Neural Network Approximation of Nonlinear Model Predictive Control","volume":"53","author":"Cao","year":"2020","journal-title":"IFAC-PapersOnLine"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b57","doi-asserted-by":"crossref","first-page":"2897","DOI":"10.1109\/LRA.2020.2974653","article-title":"MPC-Net: A First Principles Guided Policy Search","volume":"5","author":"Carius","year":"2020","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b58","series-title":"2024 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"8002","article-title":"Bird\u2019s-Eye-View Trajectory Planning of Multiple Robots using Continuous Deep Reinforcement Learning and Model Predictive Control","author":"Ceder","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b59","doi-asserted-by":"crossref","DOI":"10.1016\/j.apenergy.2021.117634","article-title":"Model-predictive control and reinforcement learning in multi-energy system case studies","volume":"303","author":"Ceusters","year":"2021","journal-title":"Applied Energy"},{"issue":"10","key":"10.1016\/j.arcontrol.2026.101045_b60","doi-asserted-by":"crossref","first-page":"1205","DOI":"10.1016\/S0005-1098(98)00073-9","article-title":"A Quasi-Infinite Horizon Nonlinear Model Predictive Control Scheme with Guaranteed Stability","volume":"34","author":"Chen","year":"1998","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b61","series-title":"2018 annual American control conference (ACC)","first-page":"1520","article-title":"Approximating Explicit Model Predictive Control Using Constrained Neural Networks","author":"Chen","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b62","first-page":"3387","article-title":"End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks","volume":"Vol. 33","author":"Cheng","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b63","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"Chua","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.arcontrol.2026.101045_b64","series-title":"2022 IEEE 61st conference on decision and control (CDC)","first-page":"5316","article-title":"End-to-End Imitation Learning with Safety Guarantees using Control Barrier Functions","author":"Cosner","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b65","series-title":"IEEE 58th conference on decision and control (CDC)","first-page":"2696","article-title":"Regularized and Distributionally Robust Data-Enabled Predictive Control","author":"Coulson","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b66","series-title":"18th European control conference (ECC)","first-page":"307","article-title":"Data-Enabled Predictive Control: In the Shallows of the DeePC","author":"Coulson","year":"2019"},{"issue":"53","key":"10.1016\/j.arcontrol.2026.101045_b67","first-page":"157","article-title":"V12. 1: User\u2019s Manual for CPLEX","volume":"46","author":"Cplex","year":"2009","journal-title":"International Business Machines Corporation"},{"key":"10.1016\/j.arcontrol.2026.101045_b68","series-title":"The simplex method","author":"Dantzig","year":"1956"},{"key":"10.1016\/j.arcontrol.2026.101045_b69","series-title":"IEEE international conference on robotics and automation (ICRA)","first-page":"879","article-title":"Handling Sparse Rewards in Reinforcement Learning Using Model Predictive Control","author":"Dawood","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b70","series-title":"International conference on robotics and automation (ICRA)","first-page":"7762","article-title":"LVIS: Learning from Value Function Intervals for Contact-Aware Robot Controllers","author":"Deits","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b71","series-title":"IEEE 61st conference on decision and control (CDC)","first-page":"1111","article-title":"Lessons Learned from Data-Driven Building Control Experiments: Contrasting Gaussian Process-based MPC, Bilevel DeePC, and Deep Reinforcement Learning","author":"Di Natale","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b72","series-title":"European control conference (ECC)","first-page":"1","article-title":"Approximate Predictive Control Barrier Functions using Neural Networks: A Computationally Cheap and Permissive Safety Filter","author":"Didier","year":"2023"},{"issue":"18","key":"10.1016\/j.arcontrol.2026.101045_b73","doi-asserted-by":"crossref","first-page":"200","DOI":"10.1016\/j.ifacol.2024.09.031","article-title":"Predictive stability filters for nonlinear dynamical systems affected by disturbances","volume":"58","author":"Didier","year":"2024","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b74","article-title":"Real-Time Optimization for Large Scale Nonlinear Processes","volume":"Vol. 920","author":"Diehl","year":"2002"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b75","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1109\/TAC.2006.890372","article-title":"Formulation of Closed Loop Min-Max MPC as a Quadratically Constrained Quadratic Program","volume":"52","author":"Diehl","year":"2007","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b76","series-title":"Nonlinear model predictive control","first-page":"391","article-title":"Efficient Numerical Methods for Nonlinear MPC and Moving Horizon Estimation","volume":"Vol. 384","author":"Diehl","year":"2009"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b77","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1049\/ip-cta:20040008","article-title":"Nominal Stability of the Real-Time Iteration Scheme for Nonlinear Model Predictive Control","volume":"152","author":"Diehl","year":"2005","journal-title":"IEE Proc.-Control Theory Appl."},{"key":"10.1016\/j.arcontrol.2026.101045_b78","series-title":"Proceedings of the IFAC conference on control systems design","article-title":"Online NMPC of a looping kite using approximate infinite horizon closed loop costing","author":"Diehl","year":"2003"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b79","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1016\/j.arcontrol.2004.01.011","article-title":"Efficient NMPC of unstable periodic systems using approximate infinite horizon closed loop costing","volume":"28","author":"Diehl","year":"2004","journal-title":"Annual Reviews in Control"},{"issue":"10","key":"10.1016\/j.arcontrol.2026.101045_b80","doi-asserted-by":"crossref","first-page":"1545","DOI":"10.1016\/j.ifacol.2022.09.610","article-title":"An experimental study of two predictive reinforcement learning methods and comparison with model-predictive control","volume":"55","author":"Dobriborsci","year":"2022","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b81","series-title":"FORCES Professional","author":"Domahidi","year":"2014"},{"key":"10.1016\/j.arcontrol.2026.101045_b82","series-title":"NeuroMANCER: Neural Modules with Adaptive Nonlinear Constraints and Efficient Regularizations","author":"Drgona","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b83","first-page":"1","article-title":"Learning Constrained Parametric Differentiable Predictive Control Policies With Guarantees","author":"Drgo\u0148a","year":"2024","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics: Systems"},{"issue":"9","key":"10.1016\/j.arcontrol.2026.101045_b84","doi-asserted-by":"crossref","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","article-title":"Challenges of real-world reinforcement learning: definitions, benchmarks and analysis","volume":"110","author":"Dulac-Arnold","year":"2021","journal-title":"Machine Learning"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b85","doi-asserted-by":"crossref","first-page":"769","DOI":"10.1007\/s11081-018-9417-2","article-title":"A software framework for embedded nonlinear model predictive control using a gradient-based augmented Lagrangian approach (GRAMPC)","volume":"20","author":"Englert","year":"2019","journal-title":"Optimization and Engineering"},{"key":"10.1016\/j.arcontrol.2026.101045_b86","series-title":"Machine learning: ECML 2003","first-page":"96","article-title":"Iteratively Extending Time Horizon Reinforcement Learning","author":"Ernst","year":"2003"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b87","doi-asserted-by":"crossref","first-page":"517","DOI":"10.1109\/TSMCB.2008.2007630","article-title":"Reinforcement Learning Versus Model Predictive Control: A Comparison on a Power System Problem","volume":"39","author":"Ernst","year":"2009","journal-title":"IEEE Transactions on Systems, Man and Cybernetics, Part B (Cybernetics)"},{"key":"10.1016\/j.arcontrol.2026.101045_b88","article-title":"Learning-based state estimation and control using MHE and MPC schemes with imperfect models","volume":"73","author":"Esfahani","year":"2023","journal-title":"European Journal of Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b89","series-title":"American control conference (ACC)","first-page":"2121","article-title":"Reinforcement learning based on MPC\/MHE for unmodeled and partially observable dynamics","author":"Esfahani","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b90","series-title":"2021 60th IEEE conference on decision and control (CDC)","first-page":"6326","article-title":"Neural Network Verification in Control","author":"Everett","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b91","unstructured":"Eysenbach, B., & Levine, S. (2022). Maximum Entropy RL (Provably) Solves Some Robust RL Problems.. In 10th international conference on learning representations, ICLR."},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b92","doi-asserted-by":"crossref","first-page":"4004","DOI":"10.1016\/S1474-6670(17)56683-5","article-title":"SPC: Subspace Predictive Control","volume":"32","author":"Favoreel","year":"1999","journal-title":"IFAC Proceedings Volumes"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b93","doi-asserted-by":"crossref","first-page":"6529","DOI":"10.1016\/j.ifacol.2020.12.068","article-title":"Inexact adjoint-based SQP algorithm for real-time stochastic nonlinear MPC","volume":"53","author":"Feng","year":"2020","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b94","series-title":"International conference on machine learning","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"Finn","year":"2017"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b95","doi-asserted-by":"crossref","DOI":"10.1137\/0309035","article-title":"Stochastic Control for Small Noise Intensities","volume":"9","author":"Fleming","year":"1971","journal-title":"SIAM Journal on Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b96","doi-asserted-by":"crossref","unstructured":"Frank, J., Mannor, S., & Precup, D. (2008). Reinforcement learning in the presence of rare events. In Proceedings of the 25th international conference on machine learning (pp. 336\u2013343).","DOI":"10.1145\/1390156.1390199"},{"key":"10.1016\/j.arcontrol.2026.101045_b97","series-title":"Differentiable nonlinear model predictive control","author":"Frey","year":"2025"},{"key":"10.1016\/j.arcontrol.2026.101045_b98","doi-asserted-by":"crossref","unstructured":"Frey, J., Cairano, S. D., & Quirynen, R. (2020). Active-Set based Inexact Interior Point QP Solver for Model Predictive Control. In Proceedings of the IFAC world congress.","DOI":"10.1016\/j.ifacol.2020.12.067"},{"key":"10.1016\/j.arcontrol.2026.101045_b99","doi-asserted-by":"crossref","unstructured":"Frey, J., Gao, Y., Messerer, F., Lahr, A., Zeilinger, M. N., & Diehl, M. (2024). Efficient Zero-Order Robust Optimization for Real-Time Model Predictive Control with acados. In Proceedings of the European control conference (ECC).","DOI":"10.23919\/ECC64448.2024.10591148"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b100","doi-asserted-by":"crossref","first-page":"6563","DOI":"10.1016\/j.ifacol.2020.12.073","article-title":"HPIPM: a high-performance quadratic programming framework for model predictive control","volume":"Vol. 53","author":"Frison","year":"2020","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b101","series-title":"IFAC","first-page":"14399","article-title":"A high-performance Riccati based solver for tree-structured quadratic programs","volume":"50","author":"Frison","year":"2017"},{"key":"10.1016\/j.arcontrol.2026.101045_b102","series-title":"Learning Robust Rewards with Adversarial Inverse Reinforcement Learning","author":"Fu","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b103","series-title":"Proceedings of the 35th international conference on machine learning, ICML 2018","first-page":"1582","article-title":"Addressing function approximation error in actor-critic methods","volume":"80","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b104","volume":"Vol. 7","author":"Gauss","year":"1809"},{"key":"10.1016\/j.arcontrol.2026.101045_b105","series-title":"62nd IEEE conference on decision and control (CDC)","first-page":"4766","article-title":"Imitation Learning from Nonlinear MPC via the Exact Q-Loss and its Gauss-Newton Approximation","author":"Ghezzi","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b106","series-title":"A numerically efficient method to enhance model predictive control performance with a reinforcement learning policy","author":"Ghezzi","year":"2025"},{"issue":"4","key":"10.1016\/j.arcontrol.2026.101045_b107","doi-asserted-by":"crossref","first-page":"979","DOI":"10.1137\/S1052623499350013","article-title":"SNOPT: An SQP Algorithm for Large-Scale Constrained Optimization","volume":"12","author":"Gill","year":"2002","journal-title":"SIAM Journal on Optimization"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b108","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1109\/MCS.2018.2876958","article-title":"AutoRally: An Open Platform for Aggressive Autonomous Driving","volume":"39","author":"Goldfain","year":"2019","journal-title":"IEEE Control Systems Magazine"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b109","doi-asserted-by":"crossref","first-page":"4920","DOI":"10.1016\/j.ifacol.2017.08.747","article-title":"Relations between Model Predictive Control and Reinforcement Learning","volume":"50","author":"G\u00f6rges","year":"2017","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b110","doi-asserted-by":"crossref","first-page":"523","DOI":"10.1016\/j.automatica.2005.08.023","article-title":"Optimization over state feedback policies for robust control with constraints","volume":"42","author":"Goulart","year":"2006","journal-title":"Automatica"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b111","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1007\/s10107-007-0096-6","article-title":"Efficient robust optimization for robust control with constraints","volume":"114","author":"Goulart","year":"2008","journal-title":"Mathematical Programming"},{"key":"10.1016\/j.arcontrol.2026.101045_b112","series-title":"Lecture notes in control and information sciences","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-28780-0_2","article-title":"Explicit Nonlinear Model Predictive Control: Theory and Applications","author":"Grancharova","year":"2012"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b113","doi-asserted-by":"crossref","first-page":"3318","DOI":"10.1109\/LRA.2023.3266985","article-title":"CACTO: Continuous Actor-Critic With Trajectory Optimization\u2014Towards Global Optimality","volume":"8","author":"Grandesso","year":"2023","journal-title":"IEEE Robotics and Automation Letters"},{"issue":"7","key":"10.1016\/j.arcontrol.2026.101045_b114","doi-asserted-by":"crossref","first-page":"1681","DOI":"10.1007\/s10514-019-09829-4","article-title":"Reinforcement learning and model predictive control for robust embedded quadrotor guidance and control","volume":"43","author":"Greatwood","year":"2019","journal-title":"Autonomous Robots"},{"key":"10.1016\/j.arcontrol.2026.101045_b115","series-title":"Towards Safe Reinforcement Learning Using NMPC and Policy Gradients: Part I - Stochastic case","author":"Gros","year":"2019"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b116","doi-asserted-by":"crossref","first-page":"636","DOI":"10.1109\/TAC.2019.2913768","article-title":"Data-Driven Economic NMPC Using Reinforcement Learning","volume":"65","author":"Gros","year":"2020","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b117","doi-asserted-by":"crossref","first-page":"5219","DOI":"10.1016\/j.ifacol.2020.12.1196","article-title":"Reinforcement Learning for mixed-integer problems based on MPC","volume":"53","author":"Gros","year":"2020","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b118","series-title":"American control conference (ACC)","first-page":"1947","article-title":"Reinforcement Learning based on MPC and the Stochastic Policy Gradient Method","author":"Gros","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b119","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2022.110602","article-title":"Economic MPC of Markov Decision Processes: Dissipativity in undiscounted infinite-horizon optimal control","volume":"146","author":"Gros","year":"2022","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b120","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2022.110598","article-title":"Learning for MPC with stability & safety guarantees","volume":"146","author":"Gros","year":"2022","journal-title":"Automatica"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b121","doi-asserted-by":"crossref","first-page":"8076","DOI":"10.1016\/j.ifacol.2020.12.2276","article-title":"Safe reinforcement learning via projection on a safe set: How to achieve optimality?","volume":"53","author":"Gros","year":"2020","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b122","article-title":"From Linear to Nonlinear MPC: bridging the gap via the Real-Time Iteration","author":"Gros","year":"2016","journal-title":"International Journal of Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b123","series-title":"International conference on machine learning","first-page":"7825","article-title":"Mirror learning: A unifying framework of policy optimisation","author":"Grudzien","year":"2022"},{"issue":"17","key":"10.1016\/j.arcontrol.2026.101045_b124","doi-asserted-by":"crossref","first-page":"1","DOI":"10.3182\/20120823-5-NL-3013.00030","article-title":"NMPC without terminal constraints","volume":"45","author":"Gr\u00fcne","year":"2012","journal-title":"IFAC Proceedings Volumes"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b125","doi-asserted-by":"crossref","first-page":"725","DOI":"10.1016\/j.automatica.2012.12.003","article-title":"Economic receding horizon control without terminal constraints","volume":"49","author":"Gr\u00fcne","year":"2013","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b126","series-title":"Nonlinear model predictive control. theory and algorithms","author":"Gr\u00fcne","year":"2017"},{"issue":"9","key":"10.1016\/j.arcontrol.2026.101045_b127","article-title":"On the infinite horizon performance of receding horizon controllers","volume":"53","author":"Gr\u00fcne","year":"2008","journal-title":"Institute of Electrical and Electronics Engineers. Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b128","series-title":"Gurobi Optimizer Reference Manual","author":"Gurobi Optimization, LLC","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b129","series-title":"Proceedings of the 35th international conference on machine learning, ICML","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume":"Vol. 80","author":"Haarnoja","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b130","series-title":"Soft Actor-Critic Algorithms and Applications","author":"Haarnoja","year":"2019"},{"issue":"12","key":"10.1016\/j.arcontrol.2026.101045_b131","article-title":"An Efficient Method to Estimate the Suboptimality of Affine Controllers","volume":"56","author":"Hadjiyiannis","year":"2011","journal-title":"Institute of Electrical and Electronics Engineers. Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b132","series-title":"Proceedings of the 39th international conference on machine learning","first-page":"8387","article-title":"Temporal difference learning for model predictive control","volume":"Vol. 162","author":"Hansen","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b133","unstructured":"Hansen, N., Su, H., & Wang, X. (2024). TD-MPC2: Scalable, Robust World Models for Continuous Control. In International conference on learning representations."},{"key":"10.1016\/j.arcontrol.2026.101045_b134","series-title":"IEEE conference on control technology and applications (CCTA)","first-page":"301","article-title":"Comparison of Deep Reinforcement Learning and Model Predictive Control for Real-Time Depth Optimization of a Lifting Surface Controlled Ocean Current Turbine","author":"Hasankhani","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b135","series-title":"2021 IEEE international conference on robotics and automation (ICRA)","first-page":"7372","article-title":"The Value of Planning for Infinite-Horizon Model Predictive Control","author":"Hatch","year":"2021"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b136","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1109\/LCSYS.2018.2843682","article-title":"Learning an Approximate Model Predictive Controller With Guarantees","volume":"2","author":"Hertneck","year":"2018","journal-title":"IEEE Control Systems Letters"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b137","doi-asserted-by":"crossref","first-page":"2736","DOI":"10.1109\/TCST.2019.2949757","article-title":"Cautious Model Predictive Control Using Gaussian Process Regression","volume":"28","author":"Hewing","year":"2020","journal-title":"IEEE Transactions on Control Systems Technology"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b138","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1146\/annurev-control-090419-075625","article-title":"Learning-based model predictive control: Toward safe learning in control","volume":"3","author":"Hewing","year":"2020","journal-title":"Annual Review of Control, Robotics, and Autonomous Systems"},{"key":"10.1016\/j.arcontrol.2026.101045_b139","article-title":"Generative Adversarial Imitation Learning","volume":"Vol. 29","author":"Ho","year":"2016"},{"key":"10.1016\/j.arcontrol.2026.101045_b140","series-title":"Proceedings of the conference on robot learning","first-page":"990","article-title":"Deep Value Model Predictive Control","author":"Hoeller","year":"2020"},{"key":"10.1016\/j.arcontrol.2026.101045_b141","series-title":"Proceedings of the 6th annual learning for dynamics & control conference","first-page":"1214","article-title":"PlanNetX: Learning an efficient neural network planner from MPC for longitudinal control","author":"Hoffmann","year":"2024"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b142","doi-asserted-by":"crossref","first-page":"19","DOI":"10.20485\/jsaeijae.15.1_19","article-title":"Comparison of Reinforcement Learning and Model Predictive Control for Automated Generation of Optimal Control for Dynamic Systems within a Design Space Exploration Framework","volume":"15","author":"Hoffmann","year":"2024","journal-title":"International Journal of Automotive Engineering"},{"key":"10.1016\/j.arcontrol.2026.101045_b143","series-title":"Robust optimization of dynamic systems","author":"Houska","year":"2011"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b144","doi-asserted-by":"crossref","first-page":"298","DOI":"10.1002\/oca.939","article-title":"ACADO toolkit\u2014An open-source framework for automatic control and dynamic optimization","volume":"32","author":"Houska","year":"2011","journal-title":"Optimal Control Applications and Methods"},{"key":"10.1016\/j.arcontrol.2026.101045_b145","series-title":"9th international conference on control, decision and information technologies (coDIT)","first-page":"989","article-title":"Comparison of Traffic Control with Model Predictive Control and Deep Reinforcement Learning","author":"Imran","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b146","series-title":"N-MPC for Deep Neural Network-Based Collision Avoidance exploiting Depth Images","author":"Jacquet","year":"2024"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b147","doi-asserted-by":"crossref","first-page":"3395","DOI":"10.1109\/TRO.2022.3186804","article-title":"TAMOLS: Terrain-Aware Motion Optimization for Legged Systems","volume":"38","author":"Jenelten","year":"2022","journal-title":"IEEE Transactions on Robotics"},{"issue":"86","key":"10.1016\/j.arcontrol.2026.101045_b148","doi-asserted-by":"crossref","first-page":"eadh5401","DOI":"10.1126\/scirobotics.adh5401","article-title":"DTC: Deep Tracking Control","volume":"9","author":"Jenelten","year":"2024","journal-title":"Science Robotics"},{"issue":"12","key":"10.1016\/j.arcontrol.2026.101045_b149","doi-asserted-by":"crossref","first-page":"3238","DOI":"10.1109\/TAC.2014.2351991","article-title":"Embedded online optimization for model predictive control at megahertz rates","volume":"59","author":"Jerez","year":"2014","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b150","doi-asserted-by":"crossref","first-page":"810","DOI":"10.1109\/TAC.2003.811259","article-title":"Approximate explicit constrained linear model predictive control via orthogonal search tree","volume":"48","author":"Johansen","year":"2003","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b151","first-page":"14291","article-title":"Deep Inverse Q-learning with Constraints","volume":"Vol. 33","author":"Kalweit","year":"2020"},{"issue":"10","key":"10.1016\/j.arcontrol.2026.101045_b152","doi-asserted-by":"crossref","first-page":"6619","DOI":"10.1109\/LRA.2023.3307008","article-title":"RL + Model-Based Control: Using On-Demand Optimal Control to Learn Versatile Legged Locomotion","volume":"8","author":"Kang","year":"2023","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b153","series-title":"American control conference (ACC)","first-page":"1246","article-title":"Decomposition via ADMM for scenario-based model predictive control","author":"Kang","year":"2015"},{"key":"10.1016\/j.arcontrol.2026.101045_b154","doi-asserted-by":"crossref","first-page":"263","DOI":"10.1007\/978-3-642-01094-1_21","article-title":"Sequential Monte Carlo for model predictive control","author":"Kantas","year":"2009","journal-title":"Nonlinear Model Predictive Control: Towards New Challenging Applications"},{"issue":"20","key":"10.1016\/j.arcontrol.2026.101045_b155","doi-asserted-by":"crossref","DOI":"10.1103\/PhysRevLett.95.200201","article-title":"Linear Theory for Control of Nonlinear Stochastic Systems","volume":"95","author":"Kappen","year":"2005","journal-title":"Physical Review Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b156","doi-asserted-by":"crossref","unstructured":"Karapetyan, A., Balta, E. C., Iannelli, A., & Lygeros, J. (2023). On the Finite-Time Behavior of Suboptimal Linear Model Predictive Control. In Proceedings of the IEEE conference on decision and control (CDC).","DOI":"10.1109\/CDC49753.2023.10383607"},{"issue":"9","key":"10.1016\/j.arcontrol.2026.101045_b157","doi-asserted-by":"crossref","first-page":"3866","DOI":"10.1109\/TCYB.2020.2999556","article-title":"Efficient Representation and Approximation of Model Predictive Control Laws via Deep Learning","volume":"50","author":"Karg","year":"2020","journal-title":"IEEE Transactions on Cybernetics"},{"key":"10.1016\/j.arcontrol.2026.101045_b158","series-title":"Proceedings of the 2nd conference on learning for dynamics and control","first-page":"211","article-title":"Practical Reinforcement Learning For MPC: Learning from sparse objectives in under an hour on a real robot","author":"Karnchanachari","year":"2020"},{"issue":"7976","key":"10.1016\/j.arcontrol.2026.101045_b159","doi-asserted-by":"crossref","first-page":"982","DOI":"10.1038\/s41586-023-06419-4","article-title":"Champion-level drone racing using deep reinforcement learning","volume":"620","author":"Kaufmann","year":"2023","journal-title":"Nature"},{"key":"10.1016\/j.arcontrol.2026.101045_b160","series-title":"Robotics: science and systems XVI","article-title":"Deep Drone Acrobatics","author":"Kaufmann","year":"2020"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b161","doi-asserted-by":"crossref","first-page":"900","DOI":"10.1109\/LRA.2016.2528294","article-title":"Design Principles for a Family of Direct-Drive Legged Robots","volume":"1","author":"Kenneally","year":"2016","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b162","series-title":"Robust constraint satisfaction: Invariant sets and predictive control","author":"Kerrigan","year":"2000"},{"key":"10.1016\/j.arcontrol.2026.101045_b163","series-title":"CDC","first-page":"3675","article-title":"An improved dual Newton strategy for scenario-tree MPC","author":"Klintberg","year":"2016"},{"issue":"7","key":"10.1016\/j.arcontrol.2026.101045_b164","doi-asserted-by":"crossref","first-page":"855","DOI":"10.1177\/0278364912444543","article-title":"Cross-entropy motion planning","volume":"31","author":"Kobilarov","year":"2012","journal-title":"The International Journal of Robotics Research"},{"key":"10.1016\/j.arcontrol.2026.101045_b165","series-title":"2021 American control conference (ACC)","first-page":"1985","article-title":"Reinforcement Learning based on Scenario-tree MPC for ASVs","author":"Kordabad","year":"2021"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b166","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1016\/j.ifacol.2021.08.562","article-title":"Verification of Dissipativity and Evaluation of Storage Function in Economic Nonlinear MPC using Q-Learning","volume":"54","author":"Kordabad","year":"2021","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b167","series-title":"2023 European control conference (ECC)","first-page":"1","article-title":"Bias Correction of Discounted Optimal Steady-State using Cost Modification","author":"Kordabad","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b168","series-title":"2021 European control conference (ECC)","first-page":"1086","article-title":"Bias Correction in Deterministic Policy Gradient Using Robust MPC","author":"Kordabad","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b169","doi-asserted-by":"crossref","first-page":"130058","DOI":"10.1109\/ACCESS.2022.3228922","article-title":"Safe Reinforcement Learning Using Wasserstein Distributionally Robust MPC and Chance Constraint","volume":"10","author":"Kordabad","year":"2022","journal-title":"IEEE Access"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b170","doi-asserted-by":"crossref","first-page":"1149","DOI":"10.1109\/TAC.2023.3277309","article-title":"Equivalence of Optimality Criteria for Markov Decision Process and Model Predictive Control","volume":"69","author":"Kordabad","year":"2024","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b171","series-title":"Model predictive control: Classical, robust and stochastic","author":"Kouvaritakis","year":"2015"},{"key":"10.1016\/j.arcontrol.2026.101045_b172","series-title":"Structure-exploiting numerical methods for tree-sparse optimal control problems","author":"Kouzoupis","year":"2019"},{"issue":"4","key":"10.1016\/j.arcontrol.2026.101045_b173","doi-asserted-by":"crossref","first-page":"863","DOI":"10.1007\/s10013-018-0311-1","article-title":"Recent advances in quadratic programming algorithms for nonlinear model predictive control","volume":"46","author":"Kouzoupis","year":"2018","journal-title":"Vietnam Journal of Mathematics"},{"key":"10.1016\/j.arcontrol.2026.101045_b174","doi-asserted-by":"crossref","DOI":"10.1002\/rnc.4503","article-title":"A dual Newton strategy for tree-sparse quadratic programs and its implementation in the open-source software treeQP","author":"Kouzoupis","year":"2019","journal-title":"International Jounal of Robust and Nonlinear Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b175","series-title":"Robotics: science and systems XX","article-title":"MPCC++: Model Predictive Contouring Control for Time-Optimal Flight with Safety Constraints","author":"Krinner","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b176","series-title":"Ellipsoidal calculus for estimation and control","author":"Kurzhanski","year":"1997"},{"key":"10.1016\/j.arcontrol.2026.101045_b177","series-title":"L4acados: Learning-based models for acados, applied to Gaussian process-based predictive control","author":"Lahr","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b178","doi-asserted-by":"crossref","first-page":"4224","DOI":"10.1109\/LRA.2019.2930489","article-title":"Low-Level Control of a Quadrotor With Deep Model-Based Reinforcement Learning","volume":"4","author":"Lambert","year":"2019","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b179","article-title":"Recurrent networks, hidden states and beliefs in partially observable environments","author":"Lambrechts","year":"2022","journal-title":"Transactions on Machine Learning Research"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b180","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1016\/j.automatica.2003.08.009","article-title":"Robust model predictive control using tubes","volume":"40","author":"Langson","year":"2004","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b181","series-title":"Neural networks: tricks of the trade","first-page":"9","article-title":"Efficient BackProp","author":"LeCun","year":"1998"},{"key":"10.1016\/j.arcontrol.2026.101045_b182","doi-asserted-by":"crossref","unstructured":"Leeman, A. P., K\u00f6hler, J., Messerer, F., Lahr, A., Diehl, M., & Zeilinger, M. N. (2024). Fast System Level Synthesis: Robust Model Predictive Control using Riccati Recursions. In Proceedings of the IFAC conference on nonlinear model predictive control (NMPC).","DOI":"10.1016\/j.ifacol.2024.09.027"},{"issue":"39","key":"10.1016\/j.arcontrol.2026.101045_b183","first-page":"1","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.arcontrol.2026.101045_b184","series-title":"Proceedings of the 30th international conference on machine learning","first-page":"1","article-title":"Guided Policy Search","author":"Levine","year":"2013"},{"key":"10.1016\/j.arcontrol.2026.101045_b185","article-title":"Variational Policy Search via Trajectory Optimization","volume":"Vol. 26","author":"Levine","year":"2013"},{"key":"10.1016\/j.arcontrol.2026.101045_b186","series-title":"2020 IEEE international conference on robotics and automation (ICRA)","first-page":"7166","article-title":"Robust Model Predictive Shielding for Safe Reinforcement Learning with Stochastic Dynamics","author":"Li","year":"2020"},{"key":"10.1016\/j.arcontrol.2026.101045_b187","doi-asserted-by":"crossref","unstructured":"Li, Y., Karapetyan, A., Lygeros, J., Johansson, K. H., & M U\u016eartensson, J. (2023). Performance Bounds of Model Predictive Control for Unconstrained and Constrained Linear Quadratic Problems and Beyond. In Proceedings of the IFAC World Congress.","DOI":"10.1016\/j.ifacol.2023.10.1133"},{"key":"10.1016\/j.arcontrol.2026.101045_b188","unstructured":"Li, W., & Todorov, E. (2004). Iterative Linear Quadratic Regulator Design for Nonlinear Biological Movement Systems. In Proceedings of the 1st international conference on informatics in control, automation and robotics."},{"key":"10.1016\/j.arcontrol.2026.101045_b189","series-title":"IEEE international conference on robotics and automation (ICRA)","first-page":"946","article-title":"Enforcing the consensus between Trajectory Optimization and Policy Learning for precise robot control","author":"Lidec","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b190","series-title":"4th international conference on learning representations, ICLR","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2016"},{"key":"10.1016\/j.arcontrol.2026.101045_b191","series-title":"Nonlinear model predictive control. lecture notes in control and information sciences","article-title":"Input-to-State Stability: A Unifying Framework for Robust Model Predictive Control","volume":"vol. 384","author":"Limon","year":"2009"},{"key":"10.1016\/j.arcontrol.2026.101045_b192","article-title":"Bounded-Regret MPC via Perturbation Analysis: Prediction Error, Constraints, and Nonlinearity","author":"Lin","year":"2022","journal-title":"NeurIPS"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b193","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1109\/TIV.2020.3012947","article-title":"Comparison of Deep Reinforcement Learning and Model Predictive Control for Adaptive Cruise Control","volume":"6","author":"Lin","year":"2021","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b194","doi-asserted-by":"crossref","first-page":"3312","DOI":"10.1109\/TNNLS.2023.3273590","article-title":"Reinforcement Learning-Based Model Predictive Control for Discrete-Time Systems","volume":"35","author":"Lin","year":"2024","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"issue":"7","key":"10.1016\/j.arcontrol.2026.101045_b195","doi-asserted-by":"crossref","first-page":"4139","DOI":"10.1109\/LRA.2023.3280809","article-title":"Learning to Play Trajectory Games Against Opponents With Unknown Objectives","volume":"8","author":"Liu","year":"2023","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b196","series-title":"System identification: Theory for the user","author":"Ljung","year":"1999"},{"key":"10.1016\/j.arcontrol.2026.101045_b197","unstructured":"Lowrey, K., Rajeswaran, A., Kakade, S., Todorov, E., & Mordatch, I. (2019). Plan Online, Learn Offline: Efficient Learning and Exploration via Model-Based Control. In 7th international conference on learning representations."},{"key":"10.1016\/j.arcontrol.2026.101045_b198","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1016\/S0167-6911(97)00079-0","article-title":"Stability margins of nonlinear receding-horizon control via inverse optimality","volume":"32","author":"Magni","year":"1997","journal-title":"Systems & Control Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b199","series-title":"2024 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"3613","article-title":"Safe Imitation Learning of Nonlinear Model Predictive Control for Flexible Robots","author":"Mamedov","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b200","series-title":"2020 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"7637","article-title":"Squash-box feasibility driven differential dynamic programming","author":"Marti-Saumell","year":"2020"},{"key":"10.1016\/j.arcontrol.2026.101045_b201","doi-asserted-by":"crossref","unstructured":"Mastalli, C., Budhiraja, R., Merkt, W., Saurel, G., Hammoud, B., Naveau, M., Carpentier, J., Vijayakumar, S., & Mansard, N. (2020). Crocoddyl: An Efficient and Versatile Framework for Multi-Contact Optimal Control. In ICRA 2020 IEEE international conference on robotics and automation. Paris \/ Virtual, France.","DOI":"10.1109\/ICRA40945.2020.9196673"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b202","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1080\/00207176608921369","article-title":"A Second-order Gradient Method for Determining Optimal Trajectories of Non-linear Discrete-time Systems","volume":"3","author":"Mayne","year":"1966","journal-title":"International Journal of Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b203","article-title":"An apologia for stabilising terminal conditions in model predictive control","volume":"11","author":"Mayne","year":"2013","journal-title":"International Journal of Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b204","doi-asserted-by":"crossref","first-page":"1341","DOI":"10.1002\/rnc.1758","article-title":"Tube-based robust nonlinear model predictive control","volume":"21","author":"Mayne","year":"2011","journal-title":"International Journal of Robust and Nonlinear Control"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b205","doi-asserted-by":"crossref","first-page":"789","DOI":"10.1016\/S0005-1098(99)00214-9","article-title":"Constrained model predictive control: Stability and optimality","volume":"26","author":"Mayne","year":"2000","journal-title":"Automatica"},{"issue":"10","key":"10.1016\/j.arcontrol.2026.101045_b206","article-title":"Inherent Stochastic Robustness of Model Predictive Control to Large and Infrequent Disturbances","volume":"67","author":"McAllister","year":"2022","journal-title":"Institute of Electrical and Electronics Engineers. Transactions on Automatic Control"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b207","article-title":"On the Inherent Distributional Robustness of Stochastic and Nominal Model Predictive Control","volume":"69","author":"McAllister","year":"2024","journal-title":"Institute of Electrical and Electronics Engineers. Transactions on Automatic Control"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b208","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/MCS.2016.2602087","article-title":"Stochastic Model Predictive Control: An Overview and Perspectives for Future Research","volume":"36","author":"Mesbah","year":"2016","journal-title":"IEEE Control Systems Magazine"},{"key":"10.1016\/j.arcontrol.2026.101045_b209","series-title":"American control conference (ACC)","first-page":"342","article-title":"Fusion of Machine Learning and MPC under Uncertainty: What Advances Are on the Horizon?","author":"Mesbah","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b210","series-title":"Fourth-order suboptimality of nominal model predictive control in the presence of uncertainty","author":"Messerer","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b211","doi-asserted-by":"crossref","unstructured":"Messerer, F., & Diehl, M. (2021). An Efficient Algorithm for Tube-based Robust Nonlinear Optimal Control with Optimal Linear Feedback. In Proceedings of the IEEE conference on decision and control (CDC).","DOI":"10.1109\/CDC45484.2021.9683712"},{"issue":"7540","key":"10.1016\/j.arcontrol.2026.101045_b212","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nat."},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b213","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2200000086","article-title":"Model-based reinforcement learning: A survey","volume":"16","author":"Moerland","year":"2023","journal-title":"Found. Trends Mach. Learn."},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b214","doi-asserted-by":"crossref","first-page":"276","DOI":"10.3390\/make4010013","article-title":"Robust Reinforcement Learning: A Review of Foundations and Recent Advances","volume":"4","author":"Moos","year":"2022","journal-title":"Machine Learning and Knowledge Extraction"},{"key":"10.1016\/j.arcontrol.2026.101045_b215","doi-asserted-by":"crossref","first-page":"81177","DOI":"10.1109\/ACCESS.2022.3195530","article-title":"Model Predictive Control-Based Reinforcement Learning Using Expected Sarsa","volume":"10","author":"Moradimaryamnegari","year":"2022","journal-title":"IEEE Access"},{"issue":"4","key":"10.1016\/j.arcontrol.2026.101045_b216","doi-asserted-by":"crossref","first-page":"667","DOI":"10.1016\/S0098-1354(98)00301-9","article-title":"Model predictive control: past, present and future","volume":"23","author":"Morari","year":"1999","journal-title":"Computers & Chemical Engineering"},{"key":"10.1016\/j.arcontrol.2026.101045_b217","doi-asserted-by":"crossref","DOI":"10.1016\/j.compag.2023.108372","article-title":"Reinforcement Learning versus Model Predictive Control on greenhouse climate control","volume":"215","author":"Morcego","year":"2023","journal-title":"Computers and Electronics in Agriculture"},{"key":"10.1016\/j.arcontrol.2026.101045_b218","volume":"Vol. 10","author":"Mordatch","year":"2014"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b219","doi-asserted-by":"crossref","first-page":"3874","DOI":"10.1016\/j.ifacol.2023.10.1320","article-title":"Predictive Control with Learning-Based Terminal Costs Using Approximate Value Iteration","volume":"56","author":"Moreno-Mora","year":"2023","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b220","series-title":"2021 IEEE international conference on robotics and automation (ICRA)","first-page":"6672","article-title":"Model Predictive Actor-Critic: Accelerating Robot Skill Acquisition with Deep Reinforcement Learning","author":"Morgan","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b221","article-title":"Robust Reinforcement Learning","volume":"Vol. 13","author":"Morimoto","year":"2000"},{"key":"10.1016\/j.arcontrol.2026.101045_b222","series-title":"Essentially Sharp Estimates on the Entropy Regularization Error in Discounted Markov Decision Processes","author":"M\u00fcller","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b223","series-title":"IEEE international conference on robotics and automation (ICRA)","first-page":"7559","article-title":"Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning","author":"Nagabandi","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b224","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1016\/j.jprocont.2003.07.004","article-title":"Open-loop and closed-loop robust optimal control of batch processes using distributional and worst-case analysis","volume":"14","author":"Nagy","year":"2004","journal-title":"Journal of Process Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b225","doi-asserted-by":"crossref","unstructured":"Nasvytis, L., Sandbrink, K., Foerster, J., Franzmeyer, T., & Schroeder de Witt, C. (2024). Rethinking Out-of-Distribution Detection for Reinforcement Learning: Advancing Methods for Evaluation and Detection. In Proceedings of the 23rd international conference on autonomous agents and multiagent systems (pp. 1445\u20131453).","DOI":"10.65109\/XVVY9199"},{"key":"10.1016\/j.arcontrol.2026.101045_b226","series-title":"Proceedings of the 28th international conference on international conference on machine learning","first-page":"817","article-title":"Variational inference for policy search in changing situations","author":"Neumann","year":"2011"},{"key":"10.1016\/j.arcontrol.2026.101045_b227","unstructured":"Nicolao, G. D., Magni, L., & Scattolini, R. (1996a). Stabilizing nonlinear receding horizon control via a nonquadratic terminal state penalty. In Symposium on control, optimization and supervision, CESA\u201996 IMACS multiconference (pp. 185\u2013187). Lille."},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b228","article-title":"On the Robustness of Receding-Horizon Control with Terminal Constraints","volume":"41","author":"Nicolao","year":"1996","journal-title":"Institute of Electrical and Electronics Engineers. Transactions on Automatic Control"},{"issue":"7","key":"10.1016\/j.arcontrol.2026.101045_b229","doi-asserted-by":"crossref","first-page":"1030","DOI":"10.1109\/9.701133","article-title":"Stabilizing Receding-Horizon control of nonlinear time varying systems","volume":"AC-43","author":"Nicolao","year":"1998","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b230","first-page":"7886","article-title":"Control-oriented model-based reinforcement learning with implicit differentiation","volume":"Vol. 36","author":"Nikishin","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b231","series-title":"Springer series in operations research and financial engineering","article-title":"Numerical Optimization","author":"Nocedal","year":"2006"},{"key":"10.1016\/j.arcontrol.2026.101045_b232","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2023.105878","article-title":"Integrating Machine Learning and Model Predictive Control for automotive applications: A review and future directions","volume":"120","author":"Norouzi","year":"2023","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"10.1016\/j.arcontrol.2026.101045_b233","series-title":"Numerical methods for optimal control of nonsmooth dynamical systems","author":"Nurkanovi\u0107","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b234","doi-asserted-by":"crossref","DOI":"10.1016\/j.compchemeng.2023.108558","article-title":"Quantitative comparison of reinforcement learning and data-driven model predictive control for chemical and biological processes","volume":"181","author":"Oh","year":"2024","journal-title":"Computers & Chemical Engineering"},{"issue":"9","key":"10.1016\/j.arcontrol.2026.101045_b235","doi-asserted-by":"crossref","first-page":"747","DOI":"10.1016\/j.sysconle.2011.05.013","article-title":"Conditions under which suboptimal nonlinear MPC is inherently robust","volume":"60","author":"Pannocchia","year":"2011","journal-title":"Systems & Control Letters"},{"issue":"10","key":"10.1016\/j.arcontrol.2026.101045_b236","doi-asserted-by":"crossref","first-page":"1443","DOI":"10.1016\/0005-1098(95)00044-W","article-title":"A receding-horizon regulator for nonlinear systems and a neural approximation","volume":"31","author":"Parisini","year":"1995","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b237","series-title":"Algorithmic foundations of robotics XI: selected contributions of the eleventh international workshop on the algorithmic foundations of robotics","first-page":"515","article-title":"Scaling up Gaussian Belief Space Planning Through Covariance-Free Trajectory Optimization and Automatic Differentiation","author":"Patil","year":"2015"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b238","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/TAC.2013.2275667","article-title":"An accelerated dual gradient-projection algorithm for embedded linear model predictive control","volume":"59","author":"Patrinos","year":"2013","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b239","series-title":"Proceedings of the 4th annual learning for dynamics and control conference","first-page":"291","article-title":"Safe Reinforcement Learning with Chance-constrained Model Predictive Control","author":"Pfrommer","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b240","series-title":"Probabilistic Planning with Sequential Monte Carlo methods","author":"Piche","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b241","first-page":"3801","article-title":"Theseus: A library for differentiable nonlinear optimization","volume":"Vol.35","author":"Pineda","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b242","unstructured":"Pinneri, C., Sawant, S., Blaes, S., & Martius, G. (2021). Extracting Strong Policies for Robotics Tasks from Zero-Order Trajectory Optimizers. In International conference on learning representations."},{"key":"10.1016\/j.arcontrol.2026.101045_b243","series-title":"2009 IEEE international conference on robotics and automation","first-page":"94","article-title":"Randomized model predictive control for robot navigation","author":"Piovesan","year":"2009"},{"key":"10.1016\/j.arcontrol.2026.101045_b244","series-title":"Approximate dynamic programming: Solving the curses of dimensionality","author":"Powell","year":"2007"},{"key":"10.1016\/j.arcontrol.2026.101045_b245","series-title":"Wiley series in probability and statistics","article-title":"Markov decision processes: discrete stochastic dynamic programming","author":"Puterman","year":"2005"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b246","doi-asserted-by":"crossref","first-page":"3605","DOI":"10.1109\/TIV.2023.3348134","article-title":"RL-Driven MPPI: Accelerating Online Control Laws Calculation With Offline Policy","volume":"9","author":"Qu","year":"2024","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"key":"10.1016\/j.arcontrol.2026.101045_b247","series-title":"Encyclopedia of systems and control","first-page":"1","article-title":"Robust Model Predictive Control","author":"Rakovi\u0107","year":"2019"},{"issue":"11","key":"10.1016\/j.arcontrol.2026.101045_b248","doi-asserted-by":"crossref","first-page":"2746","DOI":"10.1109\/TAC.2012.2191174","article-title":"Parameterized Tube Model Predictive Control","volume":"57","author":"Rakovic","year":"2012","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b249","first-page":"497","article-title":"A survey of numerical methods for optimal control","volume":"135","author":"Rao","year":"2009","journal-title":"Advances in the Astronautical Sciences"},{"key":"10.1016\/j.arcontrol.2026.101045_b250","doi-asserted-by":"crossref","first-page":"723","DOI":"10.1023\/A:1021711402723","article-title":"Application of Interior-Point Methods to Model Predictive Control","volume":"99","author":"Rao","year":"1998","journal-title":"Journal of Optimization Theory and Applications"},{"key":"10.1016\/j.arcontrol.2026.101045_b251","unstructured":"Rashid, T., Peng, B., B\u00f6hmer, W., & Whiteson, S. (2020). Optimistic Exploration even with a Pessimistic Initialisation. In 8th international conference on learning representations, ICLR."},{"key":"10.1016\/j.arcontrol.2026.101045_b252","series-title":"Model predictive control: Theory, computation, and design","author":"Rawlings","year":"2017"},{"key":"10.1016\/j.arcontrol.2026.101045_b253","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2016.12.024","article-title":"Model predictive control with discrete actuators: Theory and application","volume":"78","author":"Rawlings","year":"2017","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b254","series-title":"Benchmarking Safe Exploration in Deep Reinforcement Learning","author":"Ray","year":"2019"},{"issue":"Volume 2","key":"10.1016\/j.arcontrol.2026.101045_b255","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1146\/annurev-control-053018-023825","article-title":"A Tour of Reinforcement Learning: The View from Continuous Control","volume":"2","author":"Recht","year":"2019","journal-title":"Annual Review of Control, Robotics, and Autonomous Systems"},{"key":"10.1016\/j.arcontrol.2026.101045_b256","series-title":"Economic model predictive control as a solution to markov decision processes","author":"Reinhardt","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b257","first-page":"1","article-title":"AC4MPC: Actor-critic reinforcement learning for guiding model predictive control","author":"Reiter","year":"2025","journal-title":"IEEE Transactions on Control Systems Technology"},{"key":"10.1016\/j.arcontrol.2026.101045_b258","series-title":"European control conference (ECC)","first-page":"1","article-title":"A Hierarchical Approach for Strategic Motion Planning in Autonomous Racing","author":"Reiter","year":"2023"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b259","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1016\/j.ifacol.2021.08.530","article-title":"Mixed-integer optimization-based planning for autonomous racing with obstacles and rewards","volume":"Vol. 54","author":"Reiter","year":"2021","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b260","series-title":"2021 IEEE international conference on robotics and automation (ICRA)","first-page":"5014","article-title":"Imitation Learning from MPC for Quadrupedal Multi-Gait Control","author":"Reske","year":"2021"},{"issue":"6","key":"10.1016\/j.arcontrol.2026.101045_b261","doi-asserted-by":"crossref","first-page":"1391","DOI":"10.1109\/TAC.2011.2176389","article-title":"Computational complexity certification for real-time MPC with input constraints based on the fast gradient method","volume":"57","author":"Richter","year":"2011","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b262","series-title":"ZipMPC: Compressed context-dependent MPC cost via imitation learning","author":"Rickenbach","year":"2025"},{"key":"10.1016\/j.arcontrol.2026.101045_b263","series-title":"2024 IEEE international conference on robotics and automation (ICRA)","first-page":"14777","article-title":"Actor-Critic Model Predictive Control","author":"Romero","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b264","series-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics","first-page":"627","article-title":"A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning","volume":"Vol. 15","author":"Ross","year":"2011"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b265","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1016\/S0377-2217(96)00385-2","article-title":"Optimization of computer simulation models with rare events","volume":"99","author":"Rubinstein","year":"1997","journal-title":"European Journal of Operational Research"},{"key":"10.1016\/j.arcontrol.2026.101045_b266","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-4321-0","article-title":"The Cross-Entropy Method","author":"Rubinstein","year":"2004"},{"key":"10.1016\/j.arcontrol.2026.101045_b267","series-title":"International conference on robotics and automation (ICRA)","first-page":"10549","article-title":"Learning to Optimize in Model Predictive Control","author":"Sacks","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b268","series-title":"Learning for CasADi: Data-driven models in numerical optimization","author":"Salzmann","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b269","doi-asserted-by":"crossref","DOI":"10.1016\/j.ifacol.2022.07.600","article-title":"Bridging the gap between QP-based and MPC-based Reinforcement Learning","author":"Sawant","year":"2022","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b270","series-title":"62nd IEEE conference on decision and control (CDC)","first-page":"4046","article-title":"Model-Free Data-Driven Predictive Control Using Reinforcement Learning","author":"Sawant","year":"2023"},{"issue":"7839","key":"10.1016\/j.arcontrol.2026.101045_b271","doi-asserted-by":"crossref","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","article-title":"Mastering atari, go, chess and shogi by planning with a learned model","volume":"588","author":"Schrittwieser","year":"2020","journal-title":"Nature"},{"key":"10.1016\/j.arcontrol.2026.101045_b272","series-title":"Trust region policy optimization","author":"Schulman","year":"2015"},{"key":"10.1016\/j.arcontrol.2026.101045_b273","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M. I., & Abbeel, P. (2016). High-Dimensional Continuous Control Using Generalized Advantage Estimation.. In 4th international conference on learning representations, ICLR."},{"key":"10.1016\/j.arcontrol.2026.101045_b274","series-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"10.1016\/j.arcontrol.2026.101045_b275","unstructured":"Schulz, F., Hoffmann, J., Zhang, Y., & Boedecker, J. (2024). Learning When to Trust the Expert for Guided Exploration in RL. In ICML 2024 workshop: foundations of reinforcement learning and control \u2013 connections and perspectives."},{"key":"10.1016\/j.arcontrol.2026.101045_b276","first-page":"1","article-title":"Stability Verification of Neural Network Controllers Using Mixed-Integer Programming","author":"Schwan","year":"2023","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b277","doi-asserted-by":"crossref","first-page":"1327","DOI":"10.1007\/s00170-021-07682-3","article-title":"Review on model predictive control: an engineering perspective","volume":"117","author":"Schwenzer","year":"2021","journal-title":"International Journal of Advanced Manufacturing Technology"},{"key":"10.1016\/j.arcontrol.2026.101045_b278","series-title":"SciPy v1.15.0 Manual","author":"SciPy v1.15.0 Manual","year":"2026"},{"key":"10.1016\/j.arcontrol.2026.101045_b279","doi-asserted-by":"crossref","first-page":"1136","DOI":"10.1109\/9.704989","article-title":"Min-max feedback model predictive control for constrained linear systems","volume":"43","author":"Scokaert","year":"1998","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b280","doi-asserted-by":"crossref","first-page":"648","DOI":"10.1109\/9.751369","article-title":"Suboptimal Model Predictive Control (Feasibility Implies Stability)","volume":"44","author":"Scokaert","year":"1999","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b281","doi-asserted-by":"crossref","first-page":"463","DOI":"10.1016\/S0005-1098(96)00213-0","article-title":"Discrete-time Stability with Perturbations: Application to Model Predictive Control","volume":"33","author":"Scokaert","year":"1997","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b282","series-title":"2023 62nd IEEE conference on decision and control (CDC)","first-page":"610","article-title":"Combining Q-learning and Deterministic Policy Gradient for Learning-Based MPC","author":"Seel","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b283","doi-asserted-by":"crossref","first-page":"366","DOI":"10.1109\/OJCSYS.2022.3221063","article-title":"Convex Neural Network-Based Cost Modifications for Learning Model Predictive Control","volume":"1","author":"Seel","year":"2022","journal-title":"IEEE Open Journal of Control Systems"},{"key":"10.1016\/j.arcontrol.2026.101045_b284","series-title":"Lectures on stochastic programming: Modelling and theory","author":"Shapiro","year":"2009"},{"key":"10.1016\/j.arcontrol.2026.101045_b285","series-title":"IEEE intelligent vehicles symposium (IV)","first-page":"1","article-title":"Reinforcement Learning and Distributed Model Predictive Control for Conflict Resolution in Highly Constrained Spaces","author":"Shen","year":"2023"},{"issue":"16","key":"10.1016\/j.arcontrol.2026.101045_b286","doi-asserted-by":"crossref","first-page":"7124","DOI":"10.3390\/s23167124","article-title":"Model-Based Predictive Control and Reinforcement Learning for Planning Vehicle-Parking Trajectories for Vertical Parking Spaces","volume":"23","author":"Shi","year":"2023","journal-title":"Sensors"},{"issue":"12","key":"10.1016\/j.arcontrol.2026.101045_b287","doi-asserted-by":"crossref","first-page":"2043","DOI":"10.1109\/TAC.2005.860248","article-title":"An efficient sequential linear quadratic algorithm for solving unconstrained nonlinear optimal control problems","volume":"50","author":"Sideris","year":"2005","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b288","series-title":"Proceedings of the 5th conference on robot learning","first-page":"1622","article-title":"Learning off-policy with online planning","volume":"Vol. 164","author":"Sikchi","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b289","series-title":"Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm","author":"Silver","year":"2017"},{"key":"10.1016\/j.arcontrol.2026.101045_b290","series-title":"Proceedings of the 31th international conference on machine learning, ICML 2014, Beijing, China, 21-26 June 2014","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume":"Vol. 32","author":"Silver","year":"2014"},{"key":"10.1016\/j.arcontrol.2026.101045_b291","series-title":"62nd IEEE conference on decision and control (CDC)","first-page":"4524","article-title":"An Efficient Method for the Joint Estimation of System Parameters and Noise Covariances for Linear Time-Variant Systems","author":"Simpson","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b292","doi-asserted-by":"crossref","DOI":"10.1007\/s10462-021-09997-9","article-title":"Reinforcement learning in robotic applications: a comprehensive survey","volume":"55","author":"Singh","year":"2022","journal-title":"Artificial Intelligence Review"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b293","first-page":"4","article-title":"Demonstrating a walk in the park: Learning to walk in 20 minutes with model-free reinforcement learning","volume":"2","author":"Smith","year":"2023","journal-title":"Robotics: Science and Systems (RSS) Demo"},{"key":"10.1016\/j.arcontrol.2026.101045_b294","article-title":"Flightmare: A Flexible Quadrotor Simulator","author":"Song","year":"2020","journal-title":"Conference on Robot Learning (CoRL)"},{"issue":"82","key":"10.1016\/j.arcontrol.2026.101045_b295","doi-asserted-by":"crossref","first-page":"eadg1462","DOI":"10.1126\/scirobotics.adg1462","article-title":"Reaching the limit in autonomous racing: Optimal control versus reinforcement learning","volume":"8","author":"Song","year":"2023","journal-title":"Science Robotics"},{"key":"10.1016\/j.arcontrol.2026.101045_b296","series-title":"2020 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"3769","article-title":"Rapidly adaptable legged robots via evolutionary meta-learning","author":"Song","year":"2020"},{"key":"10.1016\/j.arcontrol.2026.101045_b297","series-title":"Proceedings of the 39th international conference on machine learning","first-page":"20423","article-title":"Saute RL: Almost Surely Safe Reinforcement Learning Using State Augmentation","author":"Sootla","year":"2022"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b298","doi-asserted-by":"crossref","first-page":"347","DOI":"10.1007\/s001860200227","article-title":"Tree-Sparse Convex Programs","volume":"56","author":"Steinbach","year":"2002","journal-title":"Mathematical Methods of Operations Research"},{"key":"10.1016\/j.arcontrol.2026.101045_b299","series-title":"56th IEEE conference on decision and control (CDC)","first-page":"1939","article-title":"A simple and efficient algorithm for nonlinear model predictive control","author":"Stella","year":"2017"},{"key":"10.1016\/j.arcontrol.2026.101045_b300","first-page":"2","article-title":"Lectures on parametric optimization: An introduction","author":"Still","year":"2018","journal-title":"Optimization Online"},{"key":"10.1016\/j.arcontrol.2026.101045_b301","series-title":"A fast integrated planning and control framework for autonomous driving via imitation learning","author":"Sun","year":"2018"},{"issue":"4","key":"10.1016\/j.arcontrol.2026.101045_b302","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1145\/122344.122377","article-title":"Dyna, an integrated architecture for learning, planning, and reacting","volume":"2","author":"Sutton","year":"1991","journal-title":"SIGART Bull."},{"key":"10.1016\/j.arcontrol.2026.101045_b303","article-title":"Reinforcement learning: an introduction","author":"Sutton","year":"2018"},{"issue":"1\u20132","key":"10.1016\/j.arcontrol.2026.101045_b304","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning","volume":"112","author":"Sutton","year":"1999","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.arcontrol.2026.101045_b305","series-title":"International conference on machine learning","first-page":"6096","article-title":"Making deep q-learning methods robust to time discretization","author":"Tallec","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b306","doi-asserted-by":"crossref","DOI":"10.1109\/LRA.2024.3422836","article-title":"DiffTune-MPC: Closed-Loop Learning for Model Predictive Control","author":"Tao","year":"2024","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b307","doi-asserted-by":"crossref","unstructured":"Tassa, Y., Mansard, N., & Todorov, E. (2014). Control-Limited Differential Dynamic Programming. In IEEE international conference on robotics and automation.","DOI":"10.1109\/ICRA.2014.6907001"},{"issue":"4","key":"10.1016\/j.arcontrol.2026.101045_b308","doi-asserted-by":"crossref","first-page":"7635","DOI":"10.1109\/LRA.2021.3097073","article-title":"A Predictive Safety Filter for Learning-Based Racing Control","volume":"6","author":"Tearle","year":"2021","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.arcontrol.2026.101045_b309","series-title":"MATLAB version: 9.13.0 (R2022b)","author":"The MathWorks Inc","year":"2022"},{"issue":"5","key":"10.1016\/j.arcontrol.2026.101045_b310","doi-asserted-by":"crossref","first-page":"3352","DOI":"10.3390\/e17053352","article-title":"Nonlinear Stochastic Control and Information Theoretic Dualities: Connections, Interdependencies and Thermodynamic Interpretations","volume":"17","author":"Theodorou","year":"2015","journal-title":"Entropy"},{"key":"10.1016\/j.arcontrol.2026.101045_b311","series-title":"IEEE 51st IEEE conference on decision and control (CDC)","first-page":"1466","article-title":"Relative entropy and free energy dualities: Connections to Path Integral and KL control","author":"Theodorou","year":"2012"},{"key":"10.1016\/j.arcontrol.2026.101045_b312","series-title":"Revisiting the Gumbel-Softmax in MADDPG","author":"Tilbury","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b313","series-title":"2012 IEEE\/RSJ international conference on intelligent robots and systems","first-page":"5026","article-title":"MuJoCo: A physics engine for model-based control","author":"Todorov","year":"2012"},{"key":"10.1016\/j.arcontrol.2026.101045_b314","doi-asserted-by":"crossref","unstructured":"Todorov, E., & Li, W. (2005). A generalized iterative LQG method for locally-optimal feedback control of constrained nonlinear stochastic systems. In Proceedings of the American control conference (ACC).","DOI":"10.1109\/ACC.2005.1469949"},{"key":"10.1016\/j.arcontrol.2026.101045_b315","series-title":"2019 IEEE intelligent transportation systems conference (ITSC)","first-page":"3263","article-title":"Learning When to Drive in Intersections by Combining Reinforcement Learning and Model Predictive Control","author":"Tram","year":"2019"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b316","first-page":"430","article-title":"Distributionally Robust Control of Constrained Stochastic Systems","volume":"61","author":"Van Parys","year":"2016","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b317","series-title":"2023 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"10036","article-title":"Fatrop: A fast constrained optimal control problem solver for robot trajectory optimization and control","author":"Vanroye","year":"2023"},{"key":"10.1016\/j.arcontrol.2026.101045_b318","doi-asserted-by":"crossref","first-page":"261","DOI":"10.1016\/j.jprocont.2020.06.012","article-title":"Accelerating nonlinear model predictive control through machine learning","volume":"92","author":"Vaupel","year":"2020","journal-title":"Journal of Process Control"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b319","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1007\/s12532-021-00208-8","article-title":"Acados\u2014a modular open-source framework for fast embedded optimal control","volume":"14","author":"Verschueren","year":"2022","journal-title":"Mathematical Programming Computation"},{"key":"10.1016\/j.arcontrol.2026.101045_b320","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2024.111543","article-title":"Configuration-Constrained Tube MPC","volume":"163","author":"Villanueva","year":"2024","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b321","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1016\/j.automatica.2016.11.022","article-title":"Robust MPC via min-max differential inequalities","volume":"77","author":"Villanueva","year":"2017","journal-title":"Automatica"},{"key":"10.1016\/j.arcontrol.2026.101045_b322","series-title":"MPPI-Generic: A CUDA Library for Stochastic Optimization","author":"Vlahov","year":"2024"},{"issue":"5","key":"10.1016\/j.arcontrol.2026.101045_b323","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1109\/MCS.2023.3291885","article-title":"Data-Driven Safety Filters: Hamilton-Jacobi Reachability, Control Barrier Functions, and Predictive Methods for Uncertain Systems","volume":"43","author":"Wabersich","year":"2023","journal-title":"IEEE Control Systems Magazine"},{"key":"10.1016\/j.arcontrol.2026.101045_b324","series-title":"2018 IEEE conference on decision and control (CDC)","first-page":"7130","article-title":"Linear Model Predictive Safety Certification for Learning-Based Control","author":"Wabersich","year":"2018"},{"key":"10.1016\/j.arcontrol.2026.101045_b325","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2021.109597","article-title":"A predictive safety filter for learning-based control of constrained nonlinear dynamical systems","volume":"129","author":"Wabersich","year":"2021","journal-title":"Automatica"},{"issue":"1","key":"10.1016\/j.arcontrol.2026.101045_b326","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1007\/s10107-004-0559-y","article-title":"On the implementation of an interior-point filter line-search algorithm for large-scale nonlinear programming","volume":"106","author":"W\u00e4chter","year":"2006","journal-title":"Mathematical Programming"},{"key":"10.1016\/j.arcontrol.2026.101045_b327","series-title":"Exploring Model-based Planning with Policy Networks","author":"Wang","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b328","article-title":"Bregman Alternating Direction Method of Multipliers","volume":"Vol. 27","author":"Wang","year":"2014"},{"key":"10.1016\/j.arcontrol.2026.101045_b329","series-title":"Benchmarking Model-Based Reinforcement Learning","author":"Wang","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b330","doi-asserted-by":"crossref","unstructured":"Wang, C., Gao, D., Xu, K., Geng, J., Hu, Y., Qiu, Y., Li, B., Yang, F., Moon, B., Pandey, A., Aryan, Xu, J., Wu, T., He, H., Huang, D., Ren, Z., Zhao, S., Fu, T., Reddy, P., .... Scherer, S. (2023). PyPose: A Library for Robot Learning with Physics-based Optimization. In IEEE\/CVF conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR52729.2023.02109"},{"key":"10.1016\/j.arcontrol.2026.101045_b331","doi-asserted-by":"crossref","DOI":"10.1016\/j.applthermaleng.2023.120430","article-title":"Comparison of reinforcement learning and model predictive control for building energy system optimization","volume":"228","author":"Wang","year":"2023","journal-title":"Applied Thermal Engineering"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b332","first-page":"279","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/j.arcontrol.2026.101045_b333","series-title":"2024 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"5452","article-title":"Collision-Free Robot Navigation in Crowded Environments using Learning based Convex Model Predictive Control","author":"Wen","year":"2024"},{"issue":"3","key":"10.1016\/j.arcontrol.2026.101045_b334","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1023\/A:1022672621406","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"Williams","year":"1992","journal-title":"Machine Learning"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b335","doi-asserted-by":"crossref","first-page":"344","DOI":"10.2514\/1.G001921","article-title":"Model Predictive Path Integral Control: From Theory to Parallel Computation","volume":"40","author":"Williams","year":"2017","journal-title":"Journal of Guidance, Control, and Dynamics"},{"key":"10.1016\/j.arcontrol.2026.101045_b336","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1038\/s41586-021-04357-7","article-title":"Outracing champion Gran Turismo drivers with deep reinforcement learning","volume":"602","author":"Wurman","year":"2022","journal-title":"Nature"},{"key":"10.1016\/j.arcontrol.2026.101045_b337","series-title":"Proceedings of the conference on robot learning","first-page":"1","article-title":"Data Efficient Reinforcement Learning for Legged Robots","author":"Yang","year":"2020"},{"key":"10.1016\/j.arcontrol.2026.101045_b338","series-title":"2022 American control conference (ACC)","first-page":"3482","article-title":"Sampling Complexity of Path Integral Methods for Trajectory Optimization","author":"Yoon","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b339","article-title":"Zero-Order Robust Nonlinear Model Predictive Control with Ellipsoidal Uncertainty Sets","author":"Zanelli","year":"2021","journal-title":"Proceedings of the IFAC Conference on Nonlinear Model Predictive Control (NMPC)"},{"issue":"8","key":"10.1016\/j.arcontrol.2026.101045_b340","doi-asserted-by":"crossref","first-page":"3638","DOI":"10.1109\/TAC.2020.3024161","article-title":"Safe Reinforcement Learning Using Robust MPC","volume":"66","author":"Zanon","year":"2021","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.arcontrol.2026.101045_b341","series-title":"18th European control conference (ECC)","first-page":"2258","article-title":"Practical reinforcement learning of stabilizing economic MPC","author":"Zanon","year":"2019"},{"key":"10.1016\/j.arcontrol.2026.101045_b342","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2022.110399","article-title":"Stability-constrained Markov Decision Processes using MPC","volume":"143","author":"Zanon","year":"2022","journal-title":"Automatica"},{"issue":"2","key":"10.1016\/j.arcontrol.2026.101045_b343","doi-asserted-by":"crossref","first-page":"5213","DOI":"10.1016\/j.ifacol.2020.12.1195","article-title":"Reinforcement Learning Based on Real-Time Iteration NMPC","volume":"53","author":"Zanon","year":"2020","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.arcontrol.2026.101045_b344","series-title":"IEEE intelligent vehicles symposium, proceedings","first-page":"1401","article-title":"A Safe Reinforcement Learning driven Weights-varying Model Predictive Control for Autonomous Vehicle Motion Control: 35th IEEE Intelligent Vehicles Symposium, IV 2024","author":"Zarrouki","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b345","series-title":"2024 IEEE\/RSJ international conference on intelligent robots and systems (IROS)","first-page":"12726","article-title":"Adaptive Stochastic Nonlinear Model Predictive Control with Look-ahead Deep Reinforcement Learning for Autonomous Vehicle Motion Control","author":"Zarrouki","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b346","series-title":"4th international conference on control and robotics (ICCR)","first-page":"47","article-title":"Learning-Based Model Predictive Control for Quadruped Locomotion on Slippery Ground","author":"Zhang","year":"2022"},{"key":"10.1016\/j.arcontrol.2026.101045_b347","series-title":"International conference on artificial intelligence and statistics","first-page":"4015","article-title":"On the importance of hyperparameter optimization for model-based reinforcement learning","author":"Zhang","year":"2021"},{"key":"10.1016\/j.arcontrol.2026.101045_b348","doi-asserted-by":"crossref","first-page":"27853","DOI":"10.1109\/ACCESS.2022.3156581","article-title":"Building Energy Management With Reinforcement Learning and Model Predictive Control: A Survey","volume":"10","author":"Zhang","year":"2022","journal-title":"IEEE Access"},{"key":"10.1016\/j.arcontrol.2026.101045_b349","series-title":"Constrained Reinforcement Learning with Smoothed Log Barrier Function","author":"Zhang","year":"2024"},{"key":"10.1016\/j.arcontrol.2026.101045_b350","series-title":"IEEE symposium on adaptive dynamic programming and reinforcement learning (ADPRL)","first-page":"100","article-title":"Value function approximation and model predictive control","author":"Zhong","year":"2013"},{"key":"10.1016\/j.arcontrol.2026.101045_b351","series-title":"Proceedings of the 23rd AAAI conference on artificial intelligence","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","author":"Ziebart","year":"2008"}],"container-title":["Annual Reviews in Control"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1367578826000015?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1367578826000015?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:33:22Z","timestamp":1774020802000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1367578826000015"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":351,"alternative-id":["S1367578826000015"],"URL":"https:\/\/doi.org\/10.1016\/j.arcontrol.2026.101045","relation":{},"ISSN":["1367-5788"],"issn-type":[{"value":"1367-5788","type":"print"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Synthesis of model predictive control and reinforcement learning: Survey and classification","name":"articletitle","label":"Article Title"},{"value":"Annual Reviews in Control","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.arcontrol.2026.101045","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"101045"}}