{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T23:11:47Z","timestamp":1773011507155,"version":"3.50.1"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,6,6]],"date-time":"2022-06-06T00:00:00Z","timestamp":1654473600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,6]],"date-time":"2022-06-06T00:00:00Z","timestamp":1654473600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61472003"],"award-info":[{"award-number":["61472003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s10489-022-03629-7","type":"journal-article","created":{"date-parts":[[2022,6,6]],"date-time":"2022-06-06T17:02:51Z","timestamp":1654534971000},"page":"4099-4108","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["A fast adaptive algorithm for training deep neural networks"],"prefix":"10.1007","volume":"53","author":[{"given":"Yangting","family":"Gui","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4329-864X","authenticated-orcid":false,"given":"Dequan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Runyue","family":"Fang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,6]]},"reference":[{"key":"3629_CR1","unstructured":"Li W, Zhang Z, Wang X, Adax PL (2020) Adaptive gradient descent with exponential long term memory. arXiv:2004.09740"},{"key":"3629_CR2","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1016\/j.procs.2018.05.198","volume":"132","author":"N Sharma","year":"2018","unstructured":"Sharma N, Jain V, Mishra A (2018) An analysis of convolutional neural networks for image classification. Procedia Comput Sci 132:377\u2013384","journal-title":"Procedia Comput Sci"},{"issue":"12","key":"3629_CR3","doi-asserted-by":"publisher","first-page":"8810","DOI":"10.1007\/s10489-021-02297-3","volume":"51","author":"W Zhao","year":"2021","unstructured":"Zhao W, Lou M, Qi Y, Wang Y, Xu C, Deng X, Ma. Y (2021) Adaptive channel and multiscale spatial context network for breast mass segmentation in full-field mammograms. Applied Intelligence 51(12):8810\u20138827","journal-title":"Applied Intelligence"},{"issue":"11","key":"3629_CR4","doi-asserted-by":"publisher","first-page":"7781","DOI":"10.1007\/s10489-020-02115-2","volume":"51","author":"P Tian","year":"2021","unstructured":"Tian P, Mo H, Jiang L (2021) Scene graph generation by multi-level semantic tasks. Applied Intelligence, 51(11):7781\u20137793","journal-title":"Applied Intelligence,"},{"key":"3629_CR5","unstructured":"Anup KG, Puneet G, Esa R (2021) Fatalread-fooling visual speech recognition models"},{"key":"3629_CR6","doi-asserted-by":"crossref","unstructured":"Robbins H, Monro S (1951) A stochastic approximation method. The annals of mathematical statistics pages 400\u2013407","DOI":"10.1214\/aoms\/1177729586"},{"key":"3629_CR7","first-page":"543","volume":"269","author":"Y Nesterov","year":"1983","unstructured":"Nesterov Y (1983) A method for unconstrained convex minimization problem with the rate of convergence o (1\/k\u02c6 2). In Doklady an ussr, 269:543\u2013547","journal-title":"In Doklady an ussr,"},{"key":"3629_CR8","unstructured":"Sutskever I, Martens J, Dahl G, Hinton G (2013) On the importance of initialization and momentum in deep learning. In: International conference on machine learning, pages 1139\u20131147. PMLR"},{"key":"3629_CR9","first-page":"7","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi J, Hazan E, Singer Y (2011) Adaptive subgradient methods for online learning and stochastic optimization. Journal of machine learning research 12:7","journal-title":"Journal of machine learning research"},{"key":"3629_CR10","unstructured":"Matthew DZ (2012) Adadelta: an adaptive learning rate method. arXiv:1212.5701"},{"issue":"2","key":"3629_CR11","first-page":"26","volume":"4","author":"T Tijmen","year":"2012","unstructured":"Tijmen T., Geoffrey H., et al. (2012) Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude. COURSERA: Neural networks for machine learning 4(2):26\u2013 31","journal-title":"COURSERA: Neural networks for machine learning"},{"key":"3629_CR12","unstructured":"Kingma DP, Adam JB (2014) A method for stochastic optimization. arXiv:1412.6980"},{"key":"3629_CR13","unstructured":"Timothy D (2016) Incorporating nesterov momentum into adam"},{"key":"3629_CR14","unstructured":"Reddi SJ, Kale S, Kumar S (2019) On the convergence of adam and beyond. arXiv:1904.09237"},{"key":"3629_CR15","unstructured":"Wilson AC, Roelofs R, Stern M, Srebro N, Recht B (2017) The marginal value of adaptive gradient methods in machine learning"},{"key":"3629_CR16","unstructured":"Luo L, Xiong Y, Liu Y, Sun XU (2019) Adaptive gradient methods with dynamic bound of learning rate. arXiv:1902.09843"},{"issue":"5","key":"3629_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","volume":"4","author":"TP Boris","year":"1964","unstructured":"Boris TP (1964) Some methods of speeding up the convergence of iteration methods. Ussr computational mathematics and mathematical physics 4(5):1\u201317","journal-title":"Ussr computational mathematics and mathematical physics"},{"key":"3629_CR18","first-page":"18795","volume":"33","author":"J Zhuang","year":"2020","unstructured":"Zhuang J, Tang T, Ding Y, Tatikonda SC , Dvornek N, Papademetris X, Duncan J (2020) Adabelief optimizer: Adapting stepsizes by the belief in observed gradients. Adv Neural Inf Process Syst 33:18795\u201318806","journal-title":"Adv Neural Inf Process Syst"},{"key":"3629_CR19","unstructured":"Hazan E (2019) Introduction to online convex optimization. arXiv:1909.05207"},{"key":"3629_CR20","unstructured":"Zinkevich M (2003) Online convex programming and generalized infinitesimal gradient ascent. In: Proceedings of the 20th international conference on machine learning (icml-03), 928\u2013936"},{"key":"3629_CR21","unstructured":"LeCun Y (1998) The mnist database of handwritten digits. http:\/\/yann.lecuncom\/exdb\/mnist\/"},{"key":"3629_CR22","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. arXiv:1711.05101"},{"key":"3629_CR23","unstructured":"Xiao H, Rasul K, Vollgraf R (2017) Fashion-mnist: a novel image dataset for benchmarking machine learning algorithms. arXiv:1708.07747"},{"key":"3629_CR24","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition","DOI":"10.1109\/CVPR.2016.90"},{"key":"3629_CR25","unstructured":"Krizhevsky A, Hinton G (2009) Learning multiple layers of features from tiny images"},{"issue":"1","key":"3629_CR26","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham M, Eslami SM , Gool LV, Williams CKI, Winn J, Zisserman A (2015) The pascal visual object classes challenge: A retrospective. International journal of computer vision 111(1):98\u2013136","journal-title":"International journal of computer vision"},{"key":"3629_CR27","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"3629_CR28","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556"},{"key":"3629_CR29","unstructured":"Li H, Xu Z, Taylor G, Studer C, Goldstein T (2018) Visualizing the loss landscape of neural nets"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03629-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03629-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03629-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T06:45:30Z","timestamp":1675233930000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03629-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,6]]},"references-count":29,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["3629"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03629-7","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,6]]},"assertion":[{"value":"12 April 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 June 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}