{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T02:02:13Z","timestamp":1771466533238,"version":"3.50.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,10,16]],"date-time":"2021-10-16T00:00:00Z","timestamp":1634342400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,10,16]],"date-time":"2021-10-16T00:00:00Z","timestamp":1634342400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"the National Key Research and Development Program of China","award":["No. 2018YFB0204301"],"award-info":[{"award-number":["No. 2018YFB0204301"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2022,4]]},"DOI":"10.1007\/s11063-021-10658-9","type":"journal-article","created":{"date-parts":[[2021,10,16]],"date-time":"2021-10-16T19:55:23Z","timestamp":1634414123000},"page":"803-816","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["An Adaptive Learning Rate Schedule for SIGNSGD Optimizer in Neural Networks"],"prefix":"10.1007","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6566-835X","authenticated-orcid":false,"given":"Kang","family":"Wang","sequence":"first","affiliation":[]},{"given":"Tao","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Dou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,16]]},"reference":[{"key":"10658_CR1","doi-asserted-by":"crossref","unstructured":"Bengio Y (2012) Practical recommendations for gradient-based training of deep architectures. In: Neural networks: tricks of the trade. Springer, pp 437\u2013478","DOI":"10.1007\/978-3-642-35289-8_26"},{"key":"10658_CR2","unstructured":"Bernstein J, Wang YX, Azizzadenesheli K, Anandkumar A (2018a) SIGNSGD: compressed optimisation for non-convex problems. In: International conference on machine learning, PMLR, pp 560\u2013569"},{"key":"10658_CR3","unstructured":"Bernstein J, Zhao J, Azizzadenesheli K, Anandkumar A (2018b) SIGNSGD with majority vote is communication efficient and fault tolerant. ArXiv preprint arXiv:1810.05291"},{"issue":"2","key":"10658_CR4","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1137\/16M1080173","volume":"60","author":"L Bottou","year":"2018","unstructured":"Bottou L, Curtis FE, Nocedal J (2018) Optimization methods for large-scale machine learning. SIAM Rev 60(2):223\u2013311","journal-title":"SIAM Rev"},{"issue":"6","key":"10658_CR5","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1109\/MSP.2012.2211477","volume":"29","author":"L Deng","year":"2012","unstructured":"Deng L (2012) The mnist database of handwritten digit images for machine learning research [best of the web]. IEEE Signal Process Maga 29(6):141\u2013142","journal-title":"IEEE Signal Process Maga"},{"key":"10658_CR6","doi-asserted-by":"crossref","unstructured":"Deng L, Li J, Huang JT, Yao K, Yu D, Seide F, Seltzer M, Zweig G, He X, Williams J et al (2013) Recent advances in deep learning for speech research at microsoft. In: 2013 IEEE international conference on acoustics. Speech and signal processing. IEEE, pp 8604\u20138608","DOI":"10.1109\/ICASSP.2013.6639345"},{"key":"10658_CR7","unstructured":"Duchi J, Hazan E, Singer Y (2011) Adaptive subgradient methods for online learning and stochastic optimization. J Mach Learn Res 12(7):257\u2013269"},{"issue":"8","key":"10658_CR8","doi-asserted-by":"publisher","first-page":"1915","DOI":"10.1109\/TPAMI.2012.231","volume":"35","author":"C Farabet","year":"2012","unstructured":"Farabet C, Couprie C, Najman L, LeCun Y (2012) Learning hierarchical features for scene labeling. IEEE Trans Pattern Anal Mach Intell 35(8):1915\u20131929","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10658_CR9","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"10658_CR10","unstructured":"Gower RM, Loizou N, Qian X, Sailanbayev A, Shulgin E, Richt\u00e1rik P (2019) SGD: general analysis and improved rates. In: International conference on machine learning, PMLR, pp 5200\u20135209"},{"key":"10658_CR11","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"10658_CR12","unstructured":"Hu W, Zhu Z, Xiong H, Huan J (2019) Quasi-potential as an implicit regularizer for the loss function in the stochastic gradient descent. ArXiv preprint arXiv:1901.06054"},{"key":"10658_CR13","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. ArXiv preprint arXiv:1412.6980"},{"issue":"3","key":"10658_CR14","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1007\/s00779-019-01238-9","volume":"23","author":"F Kong","year":"2019","unstructured":"Kong F (2019) Facial expression recognition method based on deep convolutional neural network combined with improved LBP features. Pers Ubiquit Comput 23(3):531\u2013539","journal-title":"Pers Ubiquit Comput"},{"key":"10658_CR15","unstructured":"Krizhevsky A, Hinton G et al (2009) Learning multiple layers of features from tiny images. Handb Syst Autoimmun Dis 1(4)"},{"key":"10658_CR16","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inf Process Syst 25:1097\u20131105","journal-title":"Adv Neural Inf Process Syst"},{"issue":"7553","key":"10658_CR17","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521(7553):436\u2013444","journal-title":"Nature"},{"key":"10658_CR18","doi-asserted-by":"crossref","unstructured":"Liu C, Gardner SJ, Wen N, Elshaikh MA, Siddiqui F, Movsas B, Chetty IJ (2019) Automatic segmentation of the prostate on CT images using deep neural networks (DNN). Int J Radiat Oncol* Biol* Phys 104(4):924\u2013932","DOI":"10.1016\/j.ijrobp.2019.03.017"},{"key":"10658_CR19","unstructured":"Loshchilov I, Hutter F (2016) SGDR: stochastic gradient descent with warm restarts. ArXiv preprint arXiv:1608.03983"},{"key":"10658_CR20","unstructured":"Mandt S, Hoffman M, Blei D (2016) A variational analysis of stochastic gradient algorithms. In: International conference on machine learning, PMLR, pp 354\u2013363"},{"issue":"2","key":"10658_CR21","doi-asserted-by":"publisher","first-page":"135","DOI":"10.15625\/1813-9663\/35\/2\/13315","volume":"35","author":"CC Nguyen","year":"2019","unstructured":"Nguyen CC, Tran GS, Nghiem TP, Burie JC, Luong CM (2019) Real-time smile detection using deep learning. J Comput Sci Cybern 35(2):135\u2013145","journal-title":"J Comput Sci Cybern"},{"issue":"5","key":"10658_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","volume":"4","author":"BT Polyak","year":"1964","unstructured":"Polyak BT (1964) Some methods of speeding up the convergence of iteration methods. USSR Comput Math Math Phys 4(5):1\u201317","journal-title":"USSR Comput Math Math Phys"},{"key":"10658_CR23","doi-asserted-by":"crossref","unstructured":"Robbins H, Monro S (1951) A stochastic approximation method. Ann Math Stat 22(3):400\u2013407","DOI":"10.1214\/aoms\/1177729586"},{"key":"10658_CR24","unstructured":"Sermanet P, Eigen D, Zhang X, Mathieu M, Fergus R, LeCun Y (2013) Overfeat: integrated recognition, localization and detection using convolutional networks. ArXiv preprint arXiv:1312.6229"},{"key":"10658_CR25","unstructured":"Shu J, Zhu Y, Zhao Q, Meng D, Xu Z (2020) Meta-LR-schedule-net: learned LR schedules that scale and generalize. ArXiv preprint arXiv:2007.14546"},{"key":"10658_CR26","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. ArXiv preprint arXiv:1409.1556"},{"key":"10658_CR27","doi-asserted-by":"crossref","unstructured":"Smith LN (2017) Cyclical learning rates for training neural networks. In: 2017 IEEE winter conference on applications of computer vision (WACV). IEEE, pp 464\u2013472","DOI":"10.1109\/WACV.2017.58"},{"issue":"12","key":"10658_CR28","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1109\/JPROC.2017.2761740","volume":"105","author":"V Sze","year":"2017","unstructured":"Sze V, Chen YH, Yang TJ, Emer JS (2017) Efficient processing of deep neural networks: a tutorial and survey. Proc IEEE 105(12):2295\u20132329","journal-title":"Proc IEEE"},{"key":"10658_CR29","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"10658_CR30","unstructured":"Tieleman T, Hinton G (2012) Lecture 6.5-rmsprop: divide the gradient by a running average of its recent magnitude. COURSERA: Neural Netw Mach Learn 4(2):26\u201331"},{"key":"10658_CR31","unstructured":"Tompson J, Jain A, LeCun Y, Bregler C (2014) Joint training of a convolutional network and a graphical model for human pose estimation. ArXiv preprint arXiv:1406.2984"},{"key":"10658_CR32","doi-asserted-by":"crossref","unstructured":"Tompson J, Goroshin R, Jain A, LeCun Y, Bregler C (2015) Efficient object localization using convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 648\u2013656","DOI":"10.1109\/CVPR.2015.7298664"},{"issue":"2","key":"10658_CR33","doi-asserted-by":"publisher","first-page":"506","DOI":"10.1137\/S1052623495294797","volume":"8","author":"P Tseng","year":"1998","unstructured":"Tseng P (1998) An incremental gradient (-projection) method with momentum term and adaptive stepsize rule. SIAM J Optim 8(2):506\u2013531","journal-title":"SIAM J Optim"},{"key":"10658_CR34","unstructured":"Xu Z, Dai AM, Kemp J, Metz L (2019) Learning an adaptive learning rate schedule. ArXiv preprint arXiv:1909.09712"},{"key":"10658_CR35","unstructured":"Zeiler MD (2012) Adadelta: an adaptive learning rate method. ArXiv preprint arXiv:1212.5701"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-021-10658-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-021-10658-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-021-10658-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,9]],"date-time":"2022-04-09T17:07:04Z","timestamp":1649524024000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-021-10658-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,16]]},"references-count":35,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,4]]}},"alternative-id":["10658"],"URL":"https:\/\/doi.org\/10.1007\/s11063-021-10658-9","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,10,16]]},"assertion":[{"value":"5 October 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Decalarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}