{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T05:17:39Z","timestamp":1775193459318,"version":"3.50.1"},"reference-count":39,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T00:00:00Z","timestamp":1771372800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001659","name":"German Research Foundation","doi-asserted-by":"publisher","award":["SPP 2331"],"award-info":[{"award-number":["SPP 2331"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computers &amp; Chemical Engineering"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.compchemeng.2026.109605","type":"journal-article","created":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T07:40:44Z","timestamp":1771400444000},"page":"109605","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Computationally efficient Gauss\u2013Newton reinforcement learning for model predictive control"],"prefix":"10.1016","volume":"209","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1500-7064","authenticated-orcid":false,"given":"Dean","family":"Brandner","sequence":"first","affiliation":[]},{"given":"Sebastien","family":"Gros","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3347-5593","authenticated-orcid":false,"given":"Sergio","family":"Lucia","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.compchemeng.2026.109605_b1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s12532-018-0139-4","article-title":"CasADi: A software framework for nonlinear optimization and optimal control","volume":"11","author":"Andersson","year":"2019","journal-title":"Math. Program. Comput."},{"key":"10.1016\/j.compchemeng.2026.109605_b2","doi-asserted-by":"crossref","first-page":"2831","DOI":"10.1021\/acs.iecr.4c03584","article-title":"Gradient-based framework for bilevel optimization of black-box functions: synergizing model-free reinforcement learning and implicit function differentiation","volume":"64","author":"Banker","year":"2025","journal-title":"Ind. Eng. Chem. Res."},{"key":"10.1016\/j.compchemeng.2026.109605_b3","doi-asserted-by":"crossref","first-page":"545","DOI":"10.1093\/imanum\/dry009","article-title":"Exact and inexact subsampled Newton methods for optimization","volume":"39","author":"Bollapragada","year":"2019","journal-title":"IMA J. Numer. Anal."},{"key":"10.1016\/j.compchemeng.2026.109605_b4","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1137\/16M1080173","article-title":"Optimization methods for large-scale machine learning","volume":"60","author":"Bottou","year":"2018","journal-title":"SIAM Rev."},{"key":"10.1016\/j.compchemeng.2026.109605_b5","series-title":"2024 European Control Conference (ECC)","first-page":"2299","article-title":"Reinforced model predictive control via trust-region quasi-Newton policy optimization","author":"Brandner","year":"2024"},{"key":"10.1016\/j.compchemeng.2026.109605_b6","series-title":"Taschenbuch der Mathematik. Edition Harri Deutsch","author":"Bron\u0161tejn","year":"2016"},{"key":"10.1016\/j.compchemeng.2026.109605_b7","doi-asserted-by":"crossref","first-page":"1541","DOI":"10.1007\/s13042-019-01055-9","article-title":"Stochastic trust region inexact Newton method for large-scale machine learning","volume":"11","author":"Chauhan","year":"2020","journal-title":"Int. J. Mach. Learn. Cybern."},{"key":"10.1016\/j.compchemeng.2026.109605_b8","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.compchemeng.2026.109605_b9","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1007\/BF02055196","article-title":"Sensitivity and stability analysis for nonlinear programming","volume":"27","author":"Fiacco","year":"1990","journal-title":"Ann. Oper. Res."},{"key":"10.1016\/j.compchemeng.2026.109605_b10","doi-asserted-by":"crossref","DOI":"10.1016\/j.conengprac.2023.105676","article-title":"Do-mpc: towards FAIR nonlinear and robust model predictive control","volume":"140","author":"Fiedler","year":"2023","journal-title":"Control Eng. Pract."},{"key":"10.1016\/j.compchemeng.2026.109605_b11","series-title":"Proceedings of the 35th International Conference on Machine Learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.compchemeng.2026.109605_b12","first-page":"1","article-title":"Approximate Newton methods for policy search in Markov decision processes","volume":"17","author":"Furmston","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.compchemeng.2026.109605_b13","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1016\/j.ifacol.2020.12.126","article-title":"Modern machine learning tools for monitoring and control of industrial processes: a survey","volume":"53","author":"Gopaluni","year":"2020","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.compchemeng.2026.109605_b14","doi-asserted-by":"crossref","first-page":"636","DOI":"10.1109\/TAC.2019.2913768","article-title":"Data-driven economic NMPC using reinforcement learning","volume":"65","author":"Gros","year":"2020","journal-title":"IEEE Trans. Autom. Control"},{"key":"10.1016\/j.compchemeng.2026.109605_b15","series-title":"2021 American Control Conference (ACC)","first-page":"1947","article-title":"Reinforcement learning based on MPC and the stochastic policy gradient method","author":"Gros","year":"2021"},{"key":"10.1016\/j.compchemeng.2026.109605_b16","series-title":"Proceedings of the 35th International Conference on Machine Learning","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"10.1016\/j.compchemeng.2026.109605_b17","series-title":"Proceedings of the Conference on Robot Learning","first-page":"945","article-title":"Quasi-Newton trust region policy optimization","author":"Jha","year":"2020"},{"key":"10.1016\/j.compchemeng.2026.109605_b18","series-title":"Advances in Neural Information Processing Systems","article-title":"A natural policy gradient","author":"Kakade","year":"2001"},{"key":"10.1016\/j.compchemeng.2026.109605_b19","unstructured":"Kingma, D.P., Ba, J., 2015. Adam: A Method for Stochastic Optimization. In: Bengio, Y., LeCun, Y. (Eds.), 3rd International Conference on Learning Representations (ICLR). San Diego, CA, USA."},{"key":"10.1016\/j.compchemeng.2026.109605_b20","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1016\/S0098-1354(97)00261-5","article-title":"Gain-scheduling trajectory control of a continuous stirred tank reactor","volume":"22","author":"Klatt","year":"1998","journal-title":"Comput. Chem. Eng."},{"key":"10.1016\/j.compchemeng.2026.109605_b21","series-title":"Exact Gauss-Newton optimization for training deep neural networks","author":"Korbit","year":"2024"},{"key":"10.1016\/j.compchemeng.2026.109605_b22","series-title":"Quasi-Newton compatible actor-critic for deterministic policies","author":"Kordabad","year":"2025"},{"key":"10.1016\/j.compchemeng.2026.109605_b23","series-title":"2022 American Control Conference (ACC)","first-page":"2124","article-title":"Quasi-Newton iteration in deterministic policy gradient","author":"Kordabad","year":"2022"},{"key":"10.1016\/j.compchemeng.2026.109605_b24","doi-asserted-by":"crossref","first-page":"1306","DOI":"10.1016\/j.conengprac.2006.11.013","article-title":"Iterative learning control applied to batch processes: an overview","volume":"15","author":"Lee","year":"2007","journal-title":"Control Eng. Pract."},{"key":"10.1016\/j.compchemeng.2026.109605_b25","series-title":"The Twelfth International Conference on Learning Representations, {ICLR} 2024, Vienna, Austria, May 7-11, 2024","article-title":"Sophia: a scalable stochastic second-order optimizer for language model pre-training","author":"Liu","year":"2024"},{"key":"10.1016\/j.compchemeng.2026.109605_b26","doi-asserted-by":"crossref","DOI":"10.1016\/j.compchemeng.2024.108824","article-title":"End-to-end reinforcement learning of koopman models for economic nonlinear model predictive control","volume":"190","author":"Mayfrank","year":"2024","journal-title":"Comput. Chem. Eng."},{"key":"10.1016\/j.compchemeng.2026.109605_b27","series-title":"Numerical Optimization","author":"Nocedal","year":"2006"},{"key":"10.1016\/j.compchemeng.2026.109605_b28","first-page":"1","article-title":"Stable-baselines3: reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.compchemeng.2026.109605_b29","series-title":"Model Predictive Control: Theory, Computation, and Design","author":"Rawlings","year":"2017"},{"key":"10.1016\/j.compchemeng.2026.109605_b30","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1146\/annurev-control-053018-023825","article-title":"A tour of reinforcement learning: the view from continuous control","volume":"2","author":"Recht","year":"2019","journal-title":"Annu. Rev. Control. Robot. Auton. Syst."},{"key":"10.1016\/j.compchemeng.2026.109605_b31","series-title":"Synthesis of model predictive control and reinforcement learning: survey and classification","author":"Reiter","year":"2025"},{"key":"10.1016\/j.compchemeng.2026.109605_b32","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","article-title":"A stochastic approximation method","volume":"22","author":"Robbins","year":"1951","journal-title":"Ann. Math. Stat."},{"key":"10.1016\/j.compchemeng.2026.109605_b33","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1038\/323533a0","article-title":"Learning representations by back-propagating errors","volume":"323","author":"Rumelhart","year":"1986","journal-title":"Nature"},{"key":"10.1016\/j.compchemeng.2026.109605_b34","series-title":"Proceedings of the Eleventh International Conference on Artificial Intelligence and Statistics","first-page":"436","article-title":"A stochastic quasi-newton method for online convex optimization","author":"Schraudolph","year":"2007"},{"key":"10.1016\/j.compchemeng.2026.109605_b35","series-title":"Proceedings of the 31st International Conference on Machine Learning","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"Silver","year":"2014"},{"key":"10.1016\/j.compchemeng.2026.109605_b36","article-title":"Reinforcement learning: An introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.compchemeng.2026.109605_b37","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1007\/s10107-004-0559-y","article-title":"On the implementation of an interior-point filter line-search algorithm for large-scale nonlinear programming","volume":"106","author":"W\u00e4chter","year":"2006","journal-title":"Math. Program."},{"key":"10.1016\/j.compchemeng.2026.109605_b38","first-page":"10665","article-title":"ADAHESSIAN: an adaptive second order optimizer for machine learning","volume":"35","author":"Yao","year":"2021","journal-title":"Proc. the AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.compchemeng.2026.109605_b39","doi-asserted-by":"crossref","first-page":"108","DOI":"10.1016\/j.arcontrol.2021.10.006","article-title":"Reinforcement learning for batch process control: review and perspectives","volume":"52","author":"Yoo","year":"2021","journal-title":"Annu. Rev. Control."}],"container-title":["Computers &amp; Chemical Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S009813542600058X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S009813542600058X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T03:40:07Z","timestamp":1775187607000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S009813542600058X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":39,"alternative-id":["S009813542600058X"],"URL":"https:\/\/doi.org\/10.1016\/j.compchemeng.2026.109605","relation":{},"ISSN":["0098-1354"],"issn-type":[{"value":"0098-1354","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Computationally efficient Gauss\u2013Newton reinforcement learning for model predictive control","name":"articletitle","label":"Article Title"},{"value":"Computers & Chemical Engineering","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.compchemeng.2026.109605","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"109605"}}