{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:40:55Z","timestamp":1740102055835,"version":"3.37.3"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"publisher","award":["2022ZD0160302"],"award-info":[{"award-number":["2022ZD0160302"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["62276004"],"award-info":[{"award-number":["62276004"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10191204","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:03Z","timestamp":1690997403000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Gradient Descent Optimizes Normalization-Free ResNets"],"prefix":"10.1109","author":[{"given":"Zongpeng","family":"Zhang","sequence":"first","affiliation":[{"name":"Academy for Advanced Interdisciplinary Studies, Peking University,Center for Data Science"}]},{"given":"Zenan","family":"Ling","sequence":"additional","affiliation":[{"name":"School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI"}]},{"given":"Tong","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI"}]},{"given":"Zhouchen","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Intelligence Science and Technology, Peking University,National Key Lab. of General AI"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2109382"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390177"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"ref6","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume-title":"International Conference on Machine Learning","author":"Ioffe","year":"2015"},{"key":"ref7","article-title":"Characterizing signal propagation to close the performance gap in unnormalized ResNets","volume-title":"International Conference on Learning Representations","author":"Brock","year":"2020"},{"key":"ref8","first-page":"342","article-title":"The shattered gradients problem: If ResNets are the answer, then what is the question?","volume-title":"International Conference on Machine Learning","author":"Balduzzi","year":"2017"},{"key":"ref9","first-page":"1951","article-title":"Transferable normalization: Towards improving transferability of deep neural networks","volume":"32","author":"Wang","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref10","article-title":"Four things everyone should know to improve batch normalization","volume-title":"International Conference on Learning Representations","author":"Summers","year":"2019"},{"key":"ref11","first-page":"2488","article-title":"How does batch normalization help optimization?","volume":"31","author":"Santurkar","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","first-page":"7705","article-title":"Understanding batch normalization","volume":"31","author":"Bjorck","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref13","article-title":"A mean field theory of batch normalization","volume-title":"International Conference on Learning Representations","author":"Yang","year":"2018"},{"key":"ref14","first-page":"19964","article-title":"Batch normalization biases residual blocks towards the identity function in deep networks","volume":"33","author":"De","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref15","first-page":"1352","article-title":"Rezero is all you need: Fast convergence at large depth","author":"Bachlechner","year":"2021","journal-title":"Uncertainty in Artificial Intelligence"},{"key":"ref16","article-title":"Fixup initialization: Residual learning without normalization","volume-title":"International Conference on Learning Representations","author":"Zhang","year":"2018"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05839-6"},{"key":"ref19","article-title":"Gradient descent provably optimizes over-parameterized neural networks","volume-title":"International Conference on Learning Representations","author":"Du","year":"2018"},{"key":"ref20","first-page":"11961","article-title":"Global convergence of deep networks with one wide layer followed by pyramidal topology","volume":"33","author":"Nguyen","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","article-title":"On the global convergence of training deep linear ResNets","volume-title":"International Conference on Learning Representations","author":"Zou","year":"2019"},{"key":"ref22","first-page":"242","article-title":"A convergence theory for deep learning via over-parameterization","volume-title":"International Conference on Machine Learning","author":"Allen-Zhu","year":"2019"},{"key":"ref23","article-title":"Training over-parameterized deep resnet is almost as easy as training a two-layer network","author":"Zhang","year":"2019","journal-title":"arXiv preprint"},{"key":"ref24","article-title":"Global convergence of over-parameterized deep equilibrium models","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Ling","year":"2023"},{"key":"ref25","first-page":"1675","article-title":"Gradient descent finds global minima of deep neural networks","volume-title":"International Conference on Machine Learning","author":"Du","year":"2019"},{"key":"ref26","volume":"6","author":"Sun","year":"2001","journal-title":"Matrix perturbation analysis"},{"key":"ref27","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Glorot","year":"2010"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35289-8_3"},{"key":"ref29","first-page":"7103","article-title":"Mean field residual networks: On the edge of chaos","volume":"30","author":"Yang","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2931991"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref32","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009","journal-title":"Technical report"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2023,6,18]]},"location":"Gold Coast, Australia","end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10191204.pdf?arnumber=10191204","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T04:08:56Z","timestamp":1710389336000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10191204\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10191204","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}