{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T12:44:27Z","timestamp":1777380267617,"version":"3.51.4"},"reference-count":26,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T00:00:00Z","timestamp":1775520000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["2019R1A6A1A11051177"],"award-info":[{"award-number":["2019R1A6A1A11051177"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["2021R1A2C1095703"],"award-info":[{"award-number":["2021R1A2C1095703"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["RS-2025-24523778"],"award-info":[{"award-number":["RS-2025-24523778"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Array"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.array.2026.100807","type":"journal-article","created":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:59:03Z","timestamp":1775667543000},"page":"100807","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A relaxation approach to layerwise determination of learning rates in deep neural networks"],"prefix":"10.1016","volume":"30","author":[{"given":"Chohong","family":"Min","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4712-3919","authenticated-orcid":false,"given":"Byungjoon","family":"Lee","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.array.2026.100807_b1","unstructured":"P Diederik. Kingma and Jimmy Ba. In: International conference on learning representations. 2015."},{"issue":"7","key":"10.1016\/j.array.2026.100807_b2","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J Mach Learn Res"},{"key":"10.1016\/j.array.2026.100807_b3","series-title":"An overview of gradient descent optimization algorithms","author":"Ruder","year":"2016"},{"key":"10.1016\/j.array.2026.100807_b4","series-title":"Wngrad: Learn the learning rate in gradient descent","author":"Wu","year":"2018"},{"key":"10.1016\/j.array.2026.100807_b5","series-title":"Global convergence in deep learning with variable splitting via the kurdyka-\u0142ojasiewicz property","author":"Zeng","year":"2018"},{"key":"10.1016\/j.array.2026.100807_b6","first-page":"315","article-title":"Accelerating stochastic gradient descent using predictive variance reduction","volume":"26","author":"Johnson","year":"2013","journal-title":"Adv Neural Inf Process Syst"},{"key":"10.1016\/j.array.2026.100807_b7","series-title":"International conference on machine learning","first-page":"314","article-title":"Stochastic variance reduction for nonconvex optimization","author":"Reddi","year":"2016"},{"key":"10.1016\/j.array.2026.100807_b8","series-title":"Stochastic alternating direction method of multipliers with variance reduction for nonconvex optimization","author":"Huang","year":"2016"},{"key":"10.1016\/j.array.2026.100807_b9","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","article-title":"A stochastic approximation method","author":"Robbins","year":"1951","journal-title":"Ann Math Stat"},{"key":"10.1016\/j.array.2026.100807_b10","series-title":"Numerical optimization","author":"Nocedal","year":"2006"},{"issue":"3","key":"10.1016\/j.array.2026.100807_b11","doi-asserted-by":"crossref","first-page":"334","DOI":"10.1057\/palgrave.jors.2600425","article-title":"Nonlinear programming","volume":"48","author":"Bertsekas","year":"1997","journal-title":"J Oper Res Soc"},{"key":"10.1016\/j.array.2026.100807_b12","first-page":"3732","article-title":"Painless stochastic gradient: Interpolation, line-search, and convergence rates","volume":"32","author":"Vaswani","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"issue":"1","key":"10.1016\/j.array.2026.100807_b13","first-page":"4262","article-title":"Probabilistic line searches for stochastic optimization","volume":"18","author":"Mahsereci","year":"2017","journal-title":"J Mach Learn Res"},{"key":"10.1016\/j.array.2026.100807_b14","first-page":"1","article-title":"Optimal convergence rates for convex distributed optimization in networks","volume":"20","author":"Scaman","year":"2019","journal-title":"J Mach Learn Res"},{"key":"10.1016\/j.array.2026.100807_b15","series-title":"Explaining and harnessing adversarial examples","author":"Goodfellow","year":"2014"},{"key":"10.1016\/j.array.2026.100807_b16","series-title":"2015 IEEE 14th international conference on machine learning and applications","first-page":"364","article-title":"Layer-specific adaptive learning rates for deep networks","author":"Singh","year":"2015"},{"key":"10.1016\/j.array.2026.100807_b17","series-title":"Introductory lectures on convex optimization: a basic course","author":"Nesterov","year":"2013"},{"key":"10.1016\/j.array.2026.100807_b18","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv Neural Inf Process Syst"},{"key":"10.1016\/j.array.2026.100807_b19","series-title":"The large learning rate phase of deep learning: the catapult mechanism","author":"Lewkowycz","year":"2020"},{"key":"10.1016\/j.array.2026.100807_b20","series-title":"Proceedings of the 41st international conference on machine learning","first-page":"62476","article-title":"Catapults in SGD: spikes in the training loss and their impact on generalization through feature learning","author":"Zhu","year":"2024"},{"key":"10.1016\/j.array.2026.100807_b21","series-title":"A disciplined approach to neural network hyper-parameters: Part 1\u2013learning rate, batch size, momentum, and weight decay","author":"Smith","year":"2018"},{"key":"10.1016\/j.array.2026.100807_b22","first-page":"35368","article-title":"Tensor programs V: Tuning large neural networks via zero-shot hyperparameter transfer","volume":"35","author":"Yang","year":"2022","journal-title":"Adv Neural Inf Process Syst (NeurIPS)"},{"key":"10.1016\/j.array.2026.100807_b23","series-title":"UCI machine learning repository","author":"Dua","year":"2017"},{"issue":"11","key":"10.1016\/j.array.2026.100807_b24","doi-asserted-by":"crossref","first-page":"2278","DOI":"10.1109\/5.726791","article-title":"Gradient-based learning applied to document recognition","volume":"86","author":"LeCun","year":"1998","journal-title":"Proc IEEE"},{"key":"10.1016\/j.array.2026.100807_b25","unstructured":"Martens J, Grosse R. Optimizing neural networks with Kronecker-factored approximate curvature. In: Proceedings of the 32nd international conference on machine learning. 2015, p. 2408\u201317."},{"issue":"5","key":"10.1016\/j.array.2026.100807_b26","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","article-title":"Multilayer feedforward networks are universal approximators","volume":"2","author":"Hornik","year":"1989","journal-title":"Neural Netw"}],"container-title":["Array"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S259000562600130X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S259000562600130X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T09:16:51Z","timestamp":1777367811000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S259000562600130X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":26,"alternative-id":["S259000562600130X"],"URL":"https:\/\/doi.org\/10.1016\/j.array.2026.100807","relation":{},"ISSN":["2590-0056"],"issn-type":[{"value":"2590-0056","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A relaxation approach to layerwise determination of learning rates in deep neural networks","name":"articletitle","label":"Article Title"},{"value":"Array","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.array.2026.100807","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier Inc.","name":"copyright","label":"Copyright"}],"article-number":"100807"}}