{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T20:15:27Z","timestamp":1783109727254,"version":"3.54.6"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T00:00:00Z","timestamp":1667260800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T00:00:00Z","timestamp":1667260800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012259","name":"Major Discipline Academic and Technical Leaders Training Program of Jiangxi Province","doi-asserted-by":"publisher","award":["2019h211"],"award-info":[{"award-number":["2019h211"]}],"id":[{"id":"10.13039\/501100012259","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s11063-022-11057-4","type":"journal-article","created":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T11:02:53Z","timestamp":1667300573000},"page":"4641-4659","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Fast Adaptive Online Gradient Descent Algorithm in Over-Parameterized Neural Networks"],"prefix":"10.1007","volume":"55","author":[{"given":"Anni","family":"Yang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4329-864X","authenticated-orcid":false,"given":"Dequan","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guangxiang","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,11,1]]},"reference":[{"issue":"7553","key":"11057_CR1","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521(7553):436\u2013444. https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Nature"},{"issue":"03","key":"11057_CR2","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1142\/S1793351X16500045","volume":"10","author":"X Hao","year":"2016","unstructured":"Hao X, Zhang G, Ma S (2016) Deep learning. Int J Semant Comput 10(03):417\u2013439. https:\/\/doi.org\/10.1142\/S1793351X16500045","journal-title":"Int J Semant Comput"},{"key":"11057_CR3","first-page":"1","volume":"54","author":"M Lopez-Pacheco","year":"2021","unstructured":"Lopez-Pacheco M, Yu W (2021) Complex valued deep neural networks for nonlinear system modeling. Neural Process Lett 54:1\u201322","journal-title":"Neural Process Lett"},{"issue":"1","key":"11057_CR4","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1007\/s11063-020-10238-3","volume":"52","author":"W Xu","year":"2020","unstructured":"Xu W, Parvin H, Izadparast H (2020) Deep learning neural network for unconventional images classification. Neural Process Lett 52(1):169\u2013185. https:\/\/doi.org\/10.1007\/s11063-020-10238-3","journal-title":"Neural Process Lett"},{"key":"11057_CR5","doi-asserted-by":"publisher","first-page":"1706","DOI":"10.1016\/j.procs.2018.05.144","volume":"132","author":"AR Pathak","year":"2018","unstructured":"Pathak AR, Pandey M, Rautaray S (2018) Application of deep learning for object detection. Procedia Comput Sci 132:1706\u20131717","journal-title":"Procedia Comput Sci"},{"key":"11057_CR6","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"issue":"2","key":"11057_CR7","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1109\/TPAMI.2015.2439281","volume":"38","author":"C Dong","year":"2015","unstructured":"Dong C, Loy CC, He K, Tang X (2015) Image super-resolution using deep convolutional networks. IEEE Trans Pattern Anal Mach Intell 38(2):295\u2013307","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"11057_CR8","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1","key":"11057_CR9","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1109\/MSP.2010.939038","volume":"28","author":"D Yu","year":"2010","unstructured":"Yu D, Deng L (2010) Deep learning and its applications to signal and information processing [exploratory DSP]. IEEE Signal Process Mag 28(1):145\u2013154","journal-title":"IEEE Signal Process Mag"},{"issue":"4","key":"11057_CR10","doi-asserted-by":"publisher","first-page":"697","DOI":"10.1109\/TASL.2012.2229986","volume":"21","author":"X-L Zhang","year":"2012","unstructured":"Zhang X-L, Wu J (2012) Deep belief networks based voice activity detection. IEEE Trans Audio Speech Lang Process 21(4):697\u2013710","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"11057_CR11","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"11057_CR12","doi-asserted-by":"publisher","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), pp 2261\u20132269. https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"issue":"3","key":"11057_CR13","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1109\/LLS.2016.2646498","volume":"2","author":"DP Howsmon","year":"2016","unstructured":"Howsmon DP, Hahn J (2016) Regularization techniques to overcome overparameterization of complex biochemical reaction networks. IEEE Life Sci Lett 2(3):31\u201334. https:\/\/doi.org\/10.1109\/LLS.2016.2646498","journal-title":"IEEE Life Sci Lett"},{"issue":"5","key":"11057_CR14","doi-asserted-by":"publisher","first-page":"1487","DOI":"10.13031\/2013.34909","volume":"53","author":"G Whittaker","year":"2010","unstructured":"Whittaker G, Confesor R Jr, Di Luzio M, Arnold JG (2010) Detection of overparameterization and overfitting in an automatic calibration of SWAT. Trans ASABE 53(5):1487\u20131499. https:\/\/doi.org\/10.13031\/2013.34909","journal-title":"Trans ASABE"},{"key":"11057_CR15","doi-asserted-by":"publisher","first-page":"103575","DOI":"10.1016\/j.artint.2021.103575","volume":"301","author":"C Zhang","year":"2021","unstructured":"Zhang C, Li Q (2021) Distributed optimization for degenerate loss functions arising from over-parameterization. Artif Intell 301:103575","journal-title":"Artif Intell"},{"issue":"3","key":"11057_CR16","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/3446776","volume":"64","author":"C Zhang","year":"2021","unstructured":"Zhang C, Bengio S, Hardt M, Recht B, Vinyals O (2021) Understanding deep learning (still) requires rethinking generalization. Commun ACM 64(3):107\u2013115. https:\/\/doi.org\/10.1145\/3446776","journal-title":"Commun ACM"},{"key":"11057_CR17","unstructured":"Vaswani S, Bach F, Schmidt M (2019) Fast and faster convergence of SGD for over-parameterized models and an accelerated perceptron. In: Proceedings of the twenty-second international conference on artificial intelligence and statistics, vol 89, pp 1195\u20131204"},{"key":"11057_CR18","unstructured":"Du SS, Zhai X, Poczos B, Singh A (2019) Gradient descent provably optimizes over-parameterized neural networks. In: International conference on learning representations"},{"key":"11057_CR19","unstructured":"Li Y, Liang Y (2018) Learning over-parameterized neural networks via stochastic gradient descent on structured data. Adv Neural Inf Process Syst 8168\u20138177"},{"issue":"7","key":"11057_CR20","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi J, Hazan E, Singer Y (2011) Adaptive subgradient methods for online learning and stochastic optimization. J Mach Learn Res 12(7):2121\u20132159","journal-title":"J Mach Learn Res"},{"key":"11057_CR21","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: International conference on learning representations"},{"key":"11057_CR22","first-page":"1","volume":"70","author":"L Wen","year":"2021","unstructured":"Wen L, Gao L, Li X, Zeng B (2021) Convolutional neural network with automatic learning rate scheduler for fault classification. IEEE Trans Instrum Meas 70:1\u201312","journal-title":"IEEE Trans Instrum Meas"},{"issue":"12","key":"11057_CR23","doi-asserted-by":"publisher","first-page":"12890","DOI":"10.1109\/TIE.2020.3044808","volume":"68","author":"L Wen","year":"2020","unstructured":"Wen L, Li X, Gao L (2020) A new reinforcement learning based learning rate scheduler for convolutional neural network in fault classification. IEEE Trans Ind Electron 68(12):12890\u201312900","journal-title":"IEEE Trans Ind Electron"},{"key":"11057_CR24","doi-asserted-by":"crossref","unstructured":"Han J-H, Choi D-J, Hong S-K, Kim H-S (2019) Motor fault diagnosis using CNN based deep learning algorithm considering motor rotating speed. In: 2019 IEEE 6th international conference on industrial engineering and applications (ICIEA), pp 440\u2013445","DOI":"10.1109\/IEA.2019.8714900"},{"key":"11057_CR25","unstructured":"Radhakrishnan A, Belkin M, Uhler C (2020) Linear convergence of generalized mirror descent with time-dependent mirrors. arXiv preprint arXiv:2009.08574"},{"key":"11057_CR26","doi-asserted-by":"publisher","unstructured":"Ghadimi E, Feyzmahdavian HR, Johansson M (2015) Global convergence of the heavy-ball method for convex optimization. In: 2015 European control conference (ECC), pp 310\u2013315. https:\/\/doi.org\/10.1109\/ECC.2015.7330562","DOI":"10.1109\/ECC.2015.7330562"},{"key":"11057_CR27","unstructured":"Lu Z, Xia W, Arora S, Hazan E (2022) Adaptive gradient methods with local guarantees. arXiv preprint arXiv:2203.01400"},{"key":"11057_CR28","doi-asserted-by":"publisher","DOI":"10.1201\/9781003240167","volume-title":"Stochastic optimization for large-scale machine learning","author":"VK Chauhan","year":"2021","unstructured":"Chauhan VK (2021) Stochastic optimization for large-scale machine learning. CRC Press, Boca Raton"},{"key":"11057_CR29","unstructured":"Li Y, Wei C, Ma T (2019) Towards explaining the regularization effect of initial large learning rate in training neural networks. In: Proceedings of the 33rd international conference on neural information processing systems, vol 32, pp 11674\u201311685"},{"key":"11057_CR30","first-page":"1","volume":"18","author":"M Mahsereci","year":"2017","unstructured":"Mahsereci M, Hennig P (2017) Probabilistic line searches for stochastic optimization. J Mach Learn Res 18:1\u201359","journal-title":"J Mach Learn Res"},{"issue":"3","key":"11057_CR31","doi-asserted-by":"publisher","first-page":"1380","DOI":"10.1137\/110830629","volume":"34","author":"MP Friedlander","year":"2012","unstructured":"Friedlander MP, Schmidt M (2012) Hybrid deterministic-stochastic methods for data fitting. SIAM J Sci Comput 34(3):1380\u20131405","journal-title":"SIAM J Sci Comput"},{"issue":"1","key":"11057_CR32","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/s10107-012-0572-5","volume":"134","author":"RH Byrd","year":"2012","unstructured":"Byrd RH, Chin GM, Nocedal J, Wu Y (2012) Sample size selection in optimization methods for machine learning. Math Program 134(1):127\u2013155","journal-title":"Math Program"},{"key":"11057_CR33","unstructured":"Vaswani S, Kunstner F, Laradji IH, Meng SY, Schmidt M, Lacoste-Julien S (2020) Adaptive gradient methods converge faster with over-parameterization (and you can do a line-search). arXiv preprint arXiv: 2006.06835"},{"key":"11057_CR34","unstructured":"Loizou N, Vaswani S, Hadj Laradji I, Lacoste-Julien S (2021) Stochastic Polyak step-size for SGD: an adaptive learning rate for fast convergence. 130:1306\u20131314"},{"key":"11057_CR35","unstructured":"Xiao H, Kashif\u00a0Rasul RV (2017) Fashion-mnist: a novel image dataset for benchmarking machine learning algorithms. arXiv preprint arXiv:1708.07747"},{"key":"11057_CR36","unstructured":"Lamb A, Kitamoto A, Ha D, Yamamoto K, Bober-Irizar M, Clanuwat T (2018) Deep learning for classical Japanese literature. arXiv preprint arXiv:1812.01718"},{"key":"11057_CR37","unstructured":"Krizhevsky A, Hinton G (2012) Learning multiple layers of features from tiny images. Adv Neural Inf Process Syst 1106\u20131114"},{"key":"11057_CR38","doi-asserted-by":"crossref","unstructured":"Zagoruyko S, Komodakis N (2016) Wide residual networks. In: British machine vision conference","DOI":"10.5244\/C.30.87"},{"issue":"9","key":"11057_CR39","doi-asserted-by":"publisher","first-page":"685","DOI":"10.2307\/2309989","volume":"64","author":"VV Latshaw","year":"1957","unstructured":"Latshaw VV, Dixon WJ, Massey FJ (1957) Introduction to statistical analysis. Am Math Mon 64(9):685","journal-title":"Am Math Mon"},{"key":"11057_CR40","doi-asserted-by":"publisher","unstructured":"Hazan E (2016) Introduction to online convex optimization. 2:157\u2013325. https:\/\/doi.org\/10.1561\/2400000013","DOI":"10.1561\/2400000013"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11057-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-022-11057-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11057-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T16:53:21Z","timestamp":1690822401000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-022-11057-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,1]]},"references-count":40,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["11057"],"URL":"https:\/\/doi.org\/10.1007\/s11063-022-11057-4","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,1]]},"assertion":[{"value":"16 October 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 November 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}