{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T15:03:01Z","timestamp":1781622181944,"version":"3.54.5"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Deutsche Forschungsgemeinschaft (DFG) through Research Unit Forschungsgruppe (FOR) 2401","award":["424107692"],"award-info":[{"award-number":["424107692"]}]},{"name":"European Union (EU) through Embedded Learning and Optimization for the Next Generation of Smart Industrial Control Systems","award":["953348"],"award-info":[{"award-number":["953348"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Contr. Syst. Technol."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1109\/tcst.2025.3620521","type":"journal-article","created":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T18:07:32Z","timestamp":1761588452000},"page":"395-410","source":"Crossref","is-referenced-by-count":5,"title":["AC4MPC: Actor-Critic Reinforcement Learning for Guiding Model Predictive Control"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7635-2132","authenticated-orcid":false,"given":"Rudolf","family":"Reiter","sequence":"first","affiliation":[{"name":"Department of Informatics, Robotics and Perception Group, University of Zurich, Zurich, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1420-4560","authenticated-orcid":false,"given":"Andrea","family":"Ghezzi","sequence":"additional","affiliation":[{"name":"Department of Microsystems Engineering (IMTEK), University of Freiburg, Freiburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0485-8386","authenticated-orcid":false,"given":"Katrin","family":"Baumg\u00e4rtner","sequence":"additional","affiliation":[{"name":"Department of Microsystems Engineering (IMTEK), University of Freiburg, Freiburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jasper","family":"Hoffmann","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Freiburg, Freiburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5687-6875","authenticated-orcid":false,"given":"Robert D.","family":"McAllister","sequence":"additional","affiliation":[{"name":"Delft Center for Systems and Control, Delft University of Technology, Delft, The Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6556-8252","authenticated-orcid":false,"given":"Moritz","family":"Diehl","sequence":"additional","affiliation":[{"name":"Department of Microsystems Engineering and the Department of Mathematics, University of Freiburg, Freiburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Model Predictive Control: Theory, Computation, and Design","author":"Rawlings","year":"2017"},{"key":"ref2","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s00170-021-07682-3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6614995"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2011.10.011"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/s1474-6670(17)61205-9"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1126\/science.153.3731.34"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1995.478953"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2017.08.747"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012902400713"},{"key":"ref11","first-page":"8299","article-title":"Differentiable MPC for end-to-end planning and control","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Amos"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2913768"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9482765"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/tro.2025.3644945\/mm1"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.23919\/ECC57647.2023.10178143"},{"key":"ref16","article-title":"CACTO-SL: Using sobolev learning to improve continuous actor-critic with trajectory optimization","volume-title":"Proc. Learn. Dyn. Control Conf.","author":"Alboni"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3266985"},{"key":"ref18","first-page":"1","article-title":"Guided policy search","volume-title":"Proc. 30th Int. Conf. Mach. Learn.","author":"Levine"},{"key":"ref19","article-title":"Plan online, learn offline: Efficient learning and exploration via model-based control","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Lowrey"},{"key":"ref20","first-page":"35989","article-title":"Warm-start actor-critic: From approximation error to sub-optimality gap","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2018.09.014"},{"key":"ref22","first-page":"220","article-title":"End-to-end learning to warm-start for real-time quadratic optimization","volume-title":"Proc. 5th Annu. Learn. Dyn. Control Conf.","author":"Sambharya"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8795808"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3007688"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463154"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3348134"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2021.109947"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IV55152.2023.10186560"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.7411"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejcon.2020.03.001"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CDC51059.2022.9992741"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2023.10.1320"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794352"},{"key":"ref34","first-page":"211","article-title":"Practical reinforcement learning for MPC: Learning from sparse objectives in under an hour on a real robot","volume-title":"Proc. 2nd Conf. Learn. Dyn. Control","volume":"120","author":"Karnchanachari"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/9.701133"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/S1474-6670(17)34721-3"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2004.01.011"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2016.06.027"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.3166\/ejc.11.310-334"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1080\/10556780701394169"},{"key":"ref41","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref43","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref44","first-page":"7825","article-title":"Mirror learning: A unifying framework of policy optimisation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kuba"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/b98874"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1002\/oca.2492"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1561\/2600000014"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2017.04.058"},{"key":"ref49","volume-title":"Lessons From AlphaZero for Optimal, Model Predictive, and Adaptive Control","author":"Bertsekas","year":"2022"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2008.927799"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2011.2179349"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2016.03.024"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1023\/B:COAP.0000018880.63497.eb"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/s12532-021-00208-8"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-004-0559-y"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2020.12.073"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3246839"},{"issue":"268","key":"ref59","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref60","volume-title":"Roborace","year":"2019"},{"key":"ref61","first-page":"541","article-title":"Learning for CasADi: Data-driven models in numerical optimization","volume-title":"Proc. 6th Annu. Learn. Dyn. Control Conf.","author":"Salzmann"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/9.486649"}],"container-title":["IEEE Transactions on Control Systems Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/87\/11339250\/11218987.pdf?arnumber=11218987","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T22:04:44Z","timestamp":1768255484000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11218987\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":62,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tcst.2025.3620521","relation":{},"ISSN":["1063-6536","1558-0865","2374-0159"],"issn-type":[{"value":"1063-6536","type":"print"},{"value":"1558-0865","type":"electronic"},{"value":"2374-0159","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]}}}