{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T23:06:47Z","timestamp":1768345607419,"version":"3.49.0"},"reference-count":65,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006132","name":"Office of Science","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006132","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006207","name":"Fusion Energy Sciences","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006207","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,8]]},"DOI":"10.23919\/acc63710.2025.11108095","type":"proceedings-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:17:51Z","timestamp":1755800271000},"page":"1367-1376","source":"Crossref","is-referenced-by-count":2,"title":["Local-Global Learning of Interpretable Control Policies: The Interface between MPC and Reinforcement Learning"],"prefix":"10.23919","author":[{"given":"Thomas","family":"Banker","sequence":"first","affiliation":[{"name":"University of California,Department of Chemical and Biomolecular Engineering,Berkeley,CA,USA,94720"}]},{"given":"Nathan P.","family":"Lawrence","sequence":"additional","affiliation":[{"name":"University of California,Department of Chemical and Biomolecular Engineering,Berkeley,CA,USA,94720"}]},{"given":"Ali","family":"Mesbah","sequence":"additional","affiliation":[{"name":"University of California,Department of Chemical and Biomolecular Engineering,Berkeley,CA,USA,94720"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Dynamic Programming.","author":"Bellman","year":"1957"},{"key":"ref2","volume-title":"Reinforcement Learning: An Introduction.","author":"Sutton","year":"2018"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1002\/SERIES1345"},{"key":"ref4","volume-title":"Neuro-Dynamic Programming.","author":"Bertsekas","year":"1996"},{"key":"ref5","first-page":"329","article-title":"The theory of optimal control and the calculus of variations","volume":"309","author":"K\u00e1lm\u00e1n","year":"1963","journal-title":"Mathematical Optimization Techniques"},{"key":"ref6","volume-title":"Dynamic Programming and Optimal Control: Volume I","author":"Bertsekas","year":"1995"},{"key":"ref7","volume-title":"Lessons from AlphaZero for Optimal, Model Predictive, and Adaptive Control.","author":"Bertsekas","year":"2022"},{"key":"ref8","volume-title":"Model Predictive Control: Theory, Computation, and Design.","author":"Rawlings","year":"2017"},{"key":"ref9","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s12555-011-0300-6"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.23919\/ACC53348.2022.9867643"},{"key":"ref12","article-title":"Synthesis of model predictive control and reinforcement learning: Survey and classification","author":"Reiter","year":"2025"},{"key":"ref13","article-title":"A view on learning robust goal-conditioned value functions: Interplay between RL and MPC","author":"Lawrence","year":"2025"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1959.1104847"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2012.2214134"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.4064\/fm-3-1-133-181"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1515\/9781400874651"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/122344.122377"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"},{"key":"ref20","first-page":"13 973","article-title":"Trust the model where it trusts itself - model-based actor-critic with uncertainty-aware rollout adaption","volume-title":"Proceedings of the 41st International Conference on Machine Learning","volume":"235","author":"Frauenknecht"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2012.2218595"},{"key":"ref22","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Advances in Neural Information Processing Systems","volume":"12","author":"Sutton","year":"1999"},{"key":"ref23","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proceedings of the 31st International Conference on Machine Learning","volume":"32","author":"Silver"},{"key":"ref24","article-title":"Actor-critic algorithms","volume-title":"Advances in Neural Information Processing Systems","volume":"12","author":"Konda","year":"1999"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"issue":"1","key":"ref26","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"35","author":"Haarnoja"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/bfb0109870"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2016.2602087"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2913768"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110598"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-004-0559-y"},{"issue":"9","key":"ref32","first-page":"1240","article-title":"Dual control theory. I","volume":"21","author":"Feldbaum","year":"1960","journal-title":"Avtomatika i Telemekhanika"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/S1474-6670(17)45327-4"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2017.11.001"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3024161"},{"key":"ref36","article-title":"Plan online, learn offline: Efficient learning and exploration via model-based control","author":"Lowrey","year":"2019"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/S1474-6670(17)33814-4"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.jprocont.2008.11.009"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6614995"},{"key":"ref40","article-title":"Deep value model predictive control","author":"Farshidian","year":"2019"},{"key":"ref41","article-title":"Blending MPC & value function approximation for efficient reinforcement learning","author":"Bhardwaj","year":"2020"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1021\/acs.iecr.4c03584"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3360322.3360849"},{"key":"ref44","article-title":"Differentiable MPC for end-to-end planning and control","author":"Amos","year":"2019"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610381"},{"key":"ref46","article-title":"Temporal difference learning for model predictive control","author":"Hansen","year":"2022"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.23919\/ECC57647.2023.10178119"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(99)00214-9"},{"key":"ref49","article-title":"Certainty equivalence is efficient for linear quadratic control","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Mania","year":"2019"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.3166\/ejc.11.335-352"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/S0098-1354(97)00261-5"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.conengprac.2023.105676"},{"key":"ref53","article-title":"PC-Gym: Benchmark environments for process control problems","author":"Bloor","year":"2024"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1088\/1361-6595\/ab3c15"},{"key":"ref56","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4939-1037-3"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0121219"},{"key":"ref59","article-title":"Hindsight Experience Replay","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Andrychowicz","year":"2017"},{"key":"ref60","first-page":"35 603","article-title":"Contrastive learning as goal-conditioned reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Eysenbach","year":"2022"},{"key":"ref61","first-page":"541","article-title":"Learning for CasADi: Data-driven models in numerical optimization","volume-title":"Proceedings of the 6th Annual Learning for Dynamics & Control Conference","volume":"242","author":"Salzmann"},{"key":"ref62","article-title":"High-Dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2018"},{"key":"ref63","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Kumar","year":"2019"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/s12532-018-0139-4"},{"key":"ref65","article-title":"When to trust your model: Model-based policy optimization","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Janner","year":"2019"}],"event":{"name":"2025 American Control Conference (ACC)","location":"Denver, CO, USA","start":{"date-parts":[[2025,7,8]]},"end":{"date-parts":[[2025,7,10]]}},"container-title":["2025 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11107441\/11107442\/11108095.pdf?arnumber=11108095","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T23:57:49Z","timestamp":1755907069000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11108095\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,8]]},"references-count":65,"URL":"https:\/\/doi.org\/10.23919\/acc63710.2025.11108095","relation":{},"subject":[],"published":{"date-parts":[[2025,7,8]]}}}