{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T16:02:52Z","timestamp":1770739372857,"version":"3.49.0"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"ARO","award":["W911NF2010151"],"award-info":[{"award-number":["W911NF2010151"]}]},{"name":"DoD Basic Research Office","award":["HQ00342110002"],"award-info":[{"award-number":["HQ00342110002"]}]},{"name":"DARPA AIQ program through the DARPA CMO","award":["HR00112520010"],"award-info":[{"award-number":["HR00112520010"]}]},{"DOI":"10.13039\/100006778","name":"Georgia Institute of Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006778","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tpami.2025.3634391","type":"journal-article","created":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T18:45:41Z","timestamp":1763577941000},"page":"2989-3001","source":"Crossref","is-referenced-by-count":0,"title":["Optimal Control Theoretic Neural Optimizer: From Backpropagation to Dynamic Programming"],"prefix":"10.1109","volume":"48","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8989-7568","authenticated-orcid":false,"given":"Guan-Horng","family":"Liu","sequence":"first","affiliation":[{"name":"FAIR, Meta, Menlo Park, CA, USA"}]},{"given":"Tianrong","family":"Chen","sequence":"additional","affiliation":[{"name":"Apple MLR, Cupertino, CA, USA"}]},{"given":"Evangelos A.","family":"Theodorou","sequence":"additional","affiliation":[{"name":"School of Aerospace Engineering, Georgia Institute of Technology, Atlanta, Georgia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-022-05172-4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref6","article-title":"Neural ordinary differential equations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2693418"},{"key":"ref8","first-page":"21","article-title":"A theoretical framework for back-propagation","volume-title":"Proc. 1988 Connectionist Models Summer Sch.","author":"LeCun"},{"key":"ref9","volume-title":"Stochastic Models in Operations Research: Stochastic Optimization","volume":"2","author":"Heyman","year":"2004"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s10957-012-0050-5"},{"key":"ref11","volume-title":"Dynamic Programming and Optimal Control: Volume I","volume":"4","author":"Bertsekas","year":"2012"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s40304-017-0103-z"},{"key":"ref13","first-page":"3276","article-title":"Beyond finite layer neural networks: Bridging deep architectures and numerical differential equations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lu"},{"key":"ref14","first-page":"15353","article-title":"Hamiltonian neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Greydanus"},{"key":"ref15","first-page":"6426","article-title":"A mean-field analysis of deep ResNet and beyond: Towards provable optimization via overparameterization from depth","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lu"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11668"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s40687-018-0172-y"},{"key":"ref18","article-title":"Robust deep learning as optimal control: Insights and convergence guarantees","volume":"1","author":"Seidman","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref19","first-page":"2101","article-title":"Stochastic modified equations and adaptive stochastic gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1137\/19M1247620"},{"key":"ref21","article-title":"You only propagate once: Accelerating adversarial training via maximal principle","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"issue":"1","key":"ref22","first-page":"5998","article-title":"Maximum principle based algorithms for deep learning","volume":"18","author":"Li","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1954-09848-8"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2015.XI.029"},{"key":"ref25","first-page":"1287","article-title":"One practical algorithm for both stochastic and adversarial bandits","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Seldin"},{"key":"ref26","first-page":"7979","article-title":"Pontryagin differentiable programming: An end-to-end learning and control framework","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jin"},{"key":"ref27","article-title":"Understanding and improving transformer from a multi-particle dynamic system point of view","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lu"},{"key":"ref28","first-page":"3870","article-title":"Scalable gradients for stochastic differential equations","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Li"},{"key":"ref29","first-page":"6696","article-title":"Neural controlled differential equations for irregular time series","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kidger"},{"key":"ref30","first-page":"8780","article-title":"Diffusion models beat GANs on image synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Dhariwal"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1098\/rsta.2020.0093"},{"key":"ref32","first-page":"2985","article-title":"An optimal control approach to deep learning and applications to discrete-weight neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1201\/9780203749319"},{"issue":"Nov","key":"ref34","first-page":"3137","article-title":"A generalized path integral control approach to reinforcement learning","volume":"11","author":"Theodorou","year":"2010","journal-title":"J. Mach. Learn. Res."},{"key":"ref35","first-page":"9374","article-title":"Deep generalized schr\u00f6dinger bridge","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref36","article-title":"Likelihood training of schr\u00f6dinger bridge using forward-backward SDEs theory","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2005.1469949"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.2514\/1.G003516"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1080\/00207178808906114"},{"key":"ref41","volume-title":"Differential Dynamic Programming","author":"Jacobson","year":"1970"},{"key":"ref42","article-title":"Cross-entropy loss and low-rank features have responsibility for adversarial examples","author":"Nar","year":"2019"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00846"},{"key":"ref44","first-page":"557","article-title":"Practical gauss-newton optimisation for deep learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Botev"},{"key":"ref45","first-page":"9550","article-title":"Fast approximate natural gradient descent in a kronecker factored eigenbasis","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"George"},{"key":"ref46","article-title":"New insights and perspectives on the natural gradient method","author":"Martens","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"ref47","article-title":"Lecture 6.5-RMSProp, Coursera: Neural networks for machine learning","author":"Tieleman","year":"2012"},{"key":"ref48","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma"},{"key":"ref49","first-page":"2408","article-title":"Optimizing neural networks with kronecker-factored approximate curvature","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Martens"},{"key":"ref50","first-page":"573","article-title":"A kronecker-factored approximate fisher matrix for convolution layers","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Grosse"},{"key":"ref51","volume-title":"Cooperative Stochastic Differential Games","author":"Yeung","year":"2006"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/0-8176-4429-6_9"},{"key":"ref53","article-title":"The UEA multivariate time series classification archive","author":"Bagnall","year":"2018"},{"key":"ref54","article-title":"FFJORD: Free-form continuous dynamics for scalable reversible generative models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Grathwohl"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/0771-050X(80)90013-3"},{"key":"ref56","first-page":"5320","article-title":"Latent ODEs for irregularly-sampled time series","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Rubanova"},{"key":"ref57","article-title":"Advantages of differential dynamic programming over newton\u2019s method for discrete-time optimal control problems","author":"Liao","year":"1992"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/BF00934463"},{"key":"ref59","first-page":"3952","article-title":"Dissecting neural ODEs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Massaroli"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/34\/11372200\/11260962-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11372200\/11260962.pdf?arnumber=11260962","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T21:05:31Z","timestamp":1770671131000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11260962\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":59,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3634391","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}