{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T14:58:47Z","timestamp":1777733927019,"version":"3.51.4"},"reference-count":85,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022ZD0160300"],"award-info":[{"award-number":["2022ZD0160300"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF China","award":["62276004"],"award-info":[{"award-number":["62276004"]}]},{"name":"PCL, China","award":["PCL2021A12"],"award-info":[{"award-number":["PCL2021A12"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1109\/tpami.2024.3380007","type":"journal-article","created":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T19:15:05Z","timestamp":1711394105000},"page":"6231-6246","source":"Crossref","is-referenced-by-count":6,"title":["Designing Universally-Approximating Deep Neural Networks: A First-Order Optimization Approach"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6137-5492","authenticated-orcid":false,"given":"Zhoutong","family":"Wu","sequence":"first","affiliation":[{"name":"Center for Data Science, Academy for Advanced Interdisciplinary Studies, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6191-7726","authenticated-orcid":false,"given":"Mingqing","family":"Xiao","sequence":"additional","affiliation":[{"name":"National Key Lab of General AI, School of Intelligence Science and Technology, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5076-7897","authenticated-orcid":false,"given":"Cong","family":"Fang","sequence":"additional","affiliation":[{"name":"National Key Lab of General AI, School of Intelligence Science and Technology, Institute for Artificial Intelligence, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1493-7569","authenticated-orcid":false,"given":"Zhouchen","family":"Lin","sequence":"additional","affiliation":[{"name":"National Key Lab of General AI, School of Intelligence Science and Technology, Institute for Artificial Intelligence, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1097","article-title":"ImageNet classification with deep convolutional\n                        neural networks","volume-title":"Proc. Adv. Neural Inf.\n                        Process. Syst.","author":"Krizhevsky"},{"key":"ref2","article-title":"Googles neural machine translation\n                        system: Bridging the gap between human and machine\n                        translation","author":"Wu","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/0893-6080(89)90020-8"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/BF02551274"},{"key":"ref6","first-page":"6232","article-title":"The expressive power of neural networks: A view from\n                        the width","volume-title":"Proc. Adv. Neural Inf.\n                        Process. Syst.","author":"Lu"},{"key":"ref7","first-page":"6172","article-title":"ResNet with one-neuron hidden layers is a universal\n                        approximator","volume-title":"Proc. Adv. Neural Inf.\n                        Process. Syst.","author":"Lin"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref13","article-title":"Designing neural network architectures using\n                        reinforcement learning","volume-title":"Proc. Int. Conf.\n                        Learn. Representations","author":"Baker"},{"key":"ref14","article-title":"DARTS: Differentiable architecture\n                        search","volume-title":"Proc. Int. Conf. Learn.\n                        Representations","author":"Liu"},{"key":"ref15","first-page":"399","article-title":"Learning fast approximations of sparse\n                        coding","volume-title":"Proc. Int. Conf. Mach.\n                        Learn.","author":"Gregor"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2883941"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.3016905"},{"key":"ref18","first-page":"614","article-title":"Optimization algorithm inspired deep neural network\n                        structure design","volume-title":"Proc. Asian Conf. Mach.\n                        Learn.","author":"Li"},{"key":"ref19","article-title":"Deep layers as stochastic\n                    solvers","volume-title":"Proc. Int. Conf. Learn.\n                        Representations","author":"Bibi"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.4171\/JEMS\/1221"},{"key":"ref21","volume-title":"An\n                        Attempt to Test the Theories of Capillary Action by Comparing the\n                        Theoretical and Measured Forms of Drops of Fluid","author":"Bashforth","year":"1883"},{"key":"ref22","first-page":"543","article-title":"A method for unconstrained convex minimization\n                        problem with the rate of convergence\n                    O(1\/k2)","volume-title":"Proc. Doklady Akademii\n                        Nauk USSR","volume":"269","author":"Nesterov"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1561\/2400000003"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"ref25","article-title":"Deep equilibrium models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Bai"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1137\/20M1358517"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref28","first-page":"10347","article-title":"Training data-efficient image transformers &\n                        distillation through attention","volume-title":"Proc.\n                        Int. Conf. Mach. Learn.","author":"Touvron"},{"key":"ref29","article-title":"Deep, skinny neural networks are not universal\n                        approximators","volume-title":"Proc. Int. Conf. Learn.\n                        Representations","author":"Johnson"},{"key":"ref30","article-title":"Approximating continuous functions by ReLU nets of\n                        minimal width","author":"Hanin","year":"2017"},{"key":"ref31","article-title":"Minimum width for universal\n                        approximation","volume-title":"Proc. Int. Conf. Learn.\n                        Representations","author":"Park"},{"key":"ref32","first-page":"2306","article-title":"Universal approximation with deep narrow\n                        networks","volume-title":"Proc. Conf. Learn.\n                        Theory","author":"Kidger"},{"key":"ref33","article-title":"Deep unfolding: Model-based inspiration of novel deep\n                        architectures","author":"Hershey","year":"2014"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2018.2799231"},{"key":"ref35","article-title":"Deep networks from the principle of rate\n                        reduction","author":"Chan","year":"2020"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-021-3442-2"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(05)80131-5"},{"key":"ref38","first-page":"9276","article-title":"Momentum residual neural\n                    networks","volume-title":"Proc. Int. Conf. Mach.\n                        Learn.","author":"Sander"},{"key":"ref39","volume-title":"Deep\n                        Learning","author":"Goodfellow","year":"2016"},{"key":"ref40","first-page":"5874","article-title":"Implicit Euler skip connections: Enhancing\n                        adversarial robustness via numerical stability","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3181425"},{"key":"ref42","first-page":"3276","article-title":"Beyond finite layer neural networks: Bridging deep\n                        architectures and numerical differential equations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lu"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s10851-019-00903-1"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/0041-5553(64)90137-5"},{"key":"ref45","first-page":"612","article-title":"Linearized alternating direction method with adaptive\n                        penalty for low-rank representation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lin"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1137\/15M1009597"},{"key":"ref47","article-title":"Augmented neural ODEs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Dupont"},{"key":"ref48","first-page":"11086","article-title":"Approximation capabilities of neural ODEs and\n                        invertible residual networks","volume-title":"Proc. Int.\n                        Conf. Mach. Learn.","author":"Zhang"},{"key":"ref49","article-title":"Universal approximation property of neural ordinary\n                        differential equations","author":"Teshima","year":"2020"},{"key":"ref50","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s40304-017-0103-z","article-title":"A proposal on machine learning via dynamical\n                        systems","volume":"5","author":"Weinan","year":"2017","journal-title":"Commun. Math. Statist."},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.7146\/math.scand.a-10454"},{"key":"ref52","volume-title":"Solving\n                        Ordinary Differential Equations I","author":"Wanner","year":"1993"},{"issue":"10","key":"ref53","doi-asserted-by":"crossref","DOI":"10.3390\/math7100992","article-title":"Universal function approximation by deep neural nets\n                        with bounded width and ReLU activations","volume":"7","author":"Hanin","year":"2019","journal-title":"Mathematics"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/440"},{"key":"ref55","first-page":"907","article-title":"The power of depth for feedforward neural\n                        networks","volume-title":"Proc. Conf. Learn.\n                        Theory","author":"Eldan"},{"key":"ref56","first-page":"1517","article-title":"Benefits of depth in neural\n                    networks","volume-title":"Proc. Conf. Learn. Theory","author":"Telgarsky"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/BF01744431"},{"key":"ref58","first-page":"3265","article-title":"The connection between approximation, depth\n                        separation and learnability in neural networks","volume-title":"Proc. Conf. Learn. Theory","author":"Malach"},{"key":"ref59","article-title":"Learning multiple layers of features from tiny\n                        images","author":"Krizhevsky","year":"2009"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref61","article-title":"Improved regularization of convolutional neural\n                        networks with cutout","author":"DeVries","year":"2017"},{"key":"ref62","first-page":"6105","article-title":"EfficientNet: Rethinking model scaling for\n                        convolutional neural networks","volume-title":"Proc. Int.\n                        Conf. Mach. Learn.","author":"Tan"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"ref64","first-page":"189","article-title":"Multiscale deep equilibrium\n                    models","volume-title":"Proc. Adv. Neural Inf. Process.\n                        Syst.","author":"Bai"},{"key":"ref65","article-title":"Monotone operator equilibrium\n                        networks","volume-title":"Proc. Adv. Neural Inf. Process.\n                        Syst.","author":"Winston"},{"key":"ref66","article-title":"Optimization inspired multi-branch equilibrium\n                        models","volume-title":"Proc. Int. Conf. Learn.\n                        Representations","author":"Li"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref68","first-page":"249","article-title":"Understanding the difficulty of training deep\n                        feedforward neural networks","volume-title":"Proc. Int.\n                        Conf. Artif. Intell. Statist.","author":"Glorot"},{"key":"ref69","article-title":"MobileNets: Efficient convolutional neural\n                        networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"ref70","article-title":"FractalNet: Ultra-deep neural networks without\n                        residuals","volume-title":"Proc. Int. Conf. Learn.\n                        Representations","author":"Larsson"},{"key":"ref71","first-page":"6000","article-title":"Attention is all you\n                    need","volume-title":"Proc. Adv. Neural Inf. Process.\n                        Syst.","author":"Vaswani"},{"key":"ref72","article-title":"An image is worth 16x16 words: Transformers for\n                        image recognition at scale","volume-title":"Proc. Int.\n                        Conf. Learn. Representations","author":"Dosovitskiy"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.2307\/1967124"},{"key":"ref75","volume-title":"Linear\n                        Algebra and its Applications","author":"Strang","year":"2006"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611975208"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-1799-0"},{"key":"ref78","doi-asserted-by":"crossref","DOI":"10.1007\/978-981-15-2910-8","volume-title":"Accelerated Optimization for Machine Learning - First-Order\n                        Algorithms","author":"Lin","year":"2020"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611974997"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045167"},{"key":"ref81","article-title":"Layer normalization","author":"Ba","year":"2016"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1090\/proc\/14789"},{"key":"ref83","first-page":"4922","article-title":"Approximation and non-parametric estimation of\n                        ResNet-type convolutional neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Oono"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2020.01.018"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1214\/19-aos1875"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10627928\/10477580.pdf?arnumber=10477580","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T17:45:56Z","timestamp":1723052756000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10477580\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":85,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2024.3380007","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9]]}}}